1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <asm/processor.h> 37 #include "kfd_priv.h" 38 #include "kfd_device_queue_manager.h" 39 #include "kfd_dbgmgr.h" 40 41 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 42 static int kfd_open(struct inode *, struct file *); 43 static int kfd_mmap(struct file *, struct vm_area_struct *); 44 45 static const char kfd_dev_name[] = "kfd"; 46 47 static const struct file_operations kfd_fops = { 48 .owner = THIS_MODULE, 49 .unlocked_ioctl = kfd_ioctl, 50 .compat_ioctl = kfd_ioctl, 51 .open = kfd_open, 52 .mmap = kfd_mmap, 53 }; 54 55 static int kfd_char_dev_major = -1; 56 static struct class *kfd_class; 57 struct device *kfd_device; 58 59 int kfd_chardev_init(void) 60 { 61 int err = 0; 62 63 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 64 err = kfd_char_dev_major; 65 if (err < 0) 66 goto err_register_chrdev; 67 68 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 69 err = PTR_ERR(kfd_class); 70 if (IS_ERR(kfd_class)) 71 goto err_class_create; 72 73 kfd_device = device_create(kfd_class, NULL, 74 MKDEV(kfd_char_dev_major, 0), 75 NULL, kfd_dev_name); 76 err = PTR_ERR(kfd_device); 77 if (IS_ERR(kfd_device)) 78 goto err_device_create; 79 80 return 0; 81 82 err_device_create: 83 class_destroy(kfd_class); 84 err_class_create: 85 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 86 err_register_chrdev: 87 return err; 88 } 89 90 void kfd_chardev_exit(void) 91 { 92 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 93 class_destroy(kfd_class); 94 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 95 } 96 97 struct device *kfd_chardev(void) 98 { 99 return kfd_device; 100 } 101 102 103 static int kfd_open(struct inode *inode, struct file *filep) 104 { 105 struct kfd_process *process; 106 bool is_32bit_user_mode; 107 108 if (iminor(inode) != 0) 109 return -ENODEV; 110 111 is_32bit_user_mode = in_compat_syscall(); 112 113 if (is_32bit_user_mode) { 114 dev_warn(kfd_device, 115 "Process %d (32-bit) failed to open /dev/kfd\n" 116 "32-bit processes are not supported by amdkfd\n", 117 current->pid); 118 return -EPERM; 119 } 120 121 process = kfd_create_process(filep); 122 if (IS_ERR(process)) 123 return PTR_ERR(process); 124 125 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 126 process->pasid, process->is_32bit_user_mode); 127 128 return 0; 129 } 130 131 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 132 void *data) 133 { 134 struct kfd_ioctl_get_version_args *args = data; 135 136 args->major_version = KFD_IOCTL_MAJOR_VERSION; 137 args->minor_version = KFD_IOCTL_MINOR_VERSION; 138 139 return 0; 140 } 141 142 static int set_queue_properties_from_user(struct queue_properties *q_properties, 143 struct kfd_ioctl_create_queue_args *args) 144 { 145 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 146 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 147 return -EINVAL; 148 } 149 150 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 151 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 152 return -EINVAL; 153 } 154 155 if ((args->ring_base_address) && 156 (!access_ok(VERIFY_WRITE, 157 (const void __user *) args->ring_base_address, 158 sizeof(uint64_t)))) { 159 pr_err("Can't access ring base address\n"); 160 return -EFAULT; 161 } 162 163 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 164 pr_err("Ring size must be a power of 2 or 0\n"); 165 return -EINVAL; 166 } 167 168 if (!access_ok(VERIFY_WRITE, 169 (const void __user *) args->read_pointer_address, 170 sizeof(uint32_t))) { 171 pr_err("Can't access read pointer\n"); 172 return -EFAULT; 173 } 174 175 if (!access_ok(VERIFY_WRITE, 176 (const void __user *) args->write_pointer_address, 177 sizeof(uint32_t))) { 178 pr_err("Can't access write pointer\n"); 179 return -EFAULT; 180 } 181 182 if (args->eop_buffer_address && 183 !access_ok(VERIFY_WRITE, 184 (const void __user *) args->eop_buffer_address, 185 sizeof(uint32_t))) { 186 pr_debug("Can't access eop buffer"); 187 return -EFAULT; 188 } 189 190 if (args->ctx_save_restore_address && 191 !access_ok(VERIFY_WRITE, 192 (const void __user *) args->ctx_save_restore_address, 193 sizeof(uint32_t))) { 194 pr_debug("Can't access ctx save restore buffer"); 195 return -EFAULT; 196 } 197 198 q_properties->is_interop = false; 199 q_properties->queue_percent = args->queue_percentage; 200 q_properties->priority = args->queue_priority; 201 q_properties->queue_address = args->ring_base_address; 202 q_properties->queue_size = args->ring_size; 203 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 204 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 205 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 206 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 207 q_properties->ctx_save_restore_area_address = 208 args->ctx_save_restore_address; 209 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 210 q_properties->ctl_stack_size = args->ctl_stack_size; 211 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 212 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 213 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 214 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 215 q_properties->type = KFD_QUEUE_TYPE_SDMA; 216 else 217 return -ENOTSUPP; 218 219 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 220 q_properties->format = KFD_QUEUE_FORMAT_AQL; 221 else 222 q_properties->format = KFD_QUEUE_FORMAT_PM4; 223 224 pr_debug("Queue Percentage: %d, %d\n", 225 q_properties->queue_percent, args->queue_percentage); 226 227 pr_debug("Queue Priority: %d, %d\n", 228 q_properties->priority, args->queue_priority); 229 230 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 231 q_properties->queue_address, args->ring_base_address); 232 233 pr_debug("Queue Size: 0x%llX, %u\n", 234 q_properties->queue_size, args->ring_size); 235 236 pr_debug("Queue r/w Pointers: %px, %px\n", 237 q_properties->read_ptr, 238 q_properties->write_ptr); 239 240 pr_debug("Queue Format: %d\n", q_properties->format); 241 242 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 243 244 pr_debug("Queue CTX save area: 0x%llX\n", 245 q_properties->ctx_save_restore_area_address); 246 247 return 0; 248 } 249 250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 251 void *data) 252 { 253 struct kfd_ioctl_create_queue_args *args = data; 254 struct kfd_dev *dev; 255 int err = 0; 256 unsigned int queue_id; 257 struct kfd_process_device *pdd; 258 struct queue_properties q_properties; 259 260 memset(&q_properties, 0, sizeof(struct queue_properties)); 261 262 pr_debug("Creating queue ioctl\n"); 263 264 err = set_queue_properties_from_user(&q_properties, args); 265 if (err) 266 return err; 267 268 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 269 dev = kfd_device_by_id(args->gpu_id); 270 if (!dev) { 271 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 272 return -EINVAL; 273 } 274 275 mutex_lock(&p->mutex); 276 277 pdd = kfd_bind_process_to_device(dev, p); 278 if (IS_ERR(pdd)) { 279 err = -ESRCH; 280 goto err_bind_process; 281 } 282 283 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 284 p->pasid, 285 dev->id); 286 287 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 288 if (err != 0) 289 goto err_create_queue; 290 291 args->queue_id = queue_id; 292 293 294 /* Return gpu_id as doorbell offset for mmap usage */ 295 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 296 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 297 args->doorbell_offset <<= PAGE_SHIFT; 298 if (KFD_IS_SOC15(dev->device_info->asic_family)) 299 /* On SOC15 ASICs, doorbell allocation must be 300 * per-device, and independent from the per-process 301 * queue_id. Return the doorbell offset within the 302 * doorbell aperture to user mode. 303 */ 304 args->doorbell_offset |= q_properties.doorbell_off; 305 306 mutex_unlock(&p->mutex); 307 308 pr_debug("Queue id %d was created successfully\n", args->queue_id); 309 310 pr_debug("Ring buffer address == 0x%016llX\n", 311 args->ring_base_address); 312 313 pr_debug("Read ptr address == 0x%016llX\n", 314 args->read_pointer_address); 315 316 pr_debug("Write ptr address == 0x%016llX\n", 317 args->write_pointer_address); 318 319 return 0; 320 321 err_create_queue: 322 err_bind_process: 323 mutex_unlock(&p->mutex); 324 return err; 325 } 326 327 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 328 void *data) 329 { 330 int retval; 331 struct kfd_ioctl_destroy_queue_args *args = data; 332 333 pr_debug("Destroying queue id %d for pasid %d\n", 334 args->queue_id, 335 p->pasid); 336 337 mutex_lock(&p->mutex); 338 339 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 340 341 mutex_unlock(&p->mutex); 342 return retval; 343 } 344 345 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 346 void *data) 347 { 348 int retval; 349 struct kfd_ioctl_update_queue_args *args = data; 350 struct queue_properties properties; 351 352 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 353 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 354 return -EINVAL; 355 } 356 357 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 358 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 359 return -EINVAL; 360 } 361 362 if ((args->ring_base_address) && 363 (!access_ok(VERIFY_WRITE, 364 (const void __user *) args->ring_base_address, 365 sizeof(uint64_t)))) { 366 pr_err("Can't access ring base address\n"); 367 return -EFAULT; 368 } 369 370 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 371 pr_err("Ring size must be a power of 2 or 0\n"); 372 return -EINVAL; 373 } 374 375 properties.queue_address = args->ring_base_address; 376 properties.queue_size = args->ring_size; 377 properties.queue_percent = args->queue_percentage; 378 properties.priority = args->queue_priority; 379 380 pr_debug("Updating queue id %d for pasid %d\n", 381 args->queue_id, p->pasid); 382 383 mutex_lock(&p->mutex); 384 385 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 386 387 mutex_unlock(&p->mutex); 388 389 return retval; 390 } 391 392 static int kfd_ioctl_set_memory_policy(struct file *filep, 393 struct kfd_process *p, void *data) 394 { 395 struct kfd_ioctl_set_memory_policy_args *args = data; 396 struct kfd_dev *dev; 397 int err = 0; 398 struct kfd_process_device *pdd; 399 enum cache_policy default_policy, alternate_policy; 400 401 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 402 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 403 return -EINVAL; 404 } 405 406 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 407 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 408 return -EINVAL; 409 } 410 411 dev = kfd_device_by_id(args->gpu_id); 412 if (!dev) 413 return -EINVAL; 414 415 mutex_lock(&p->mutex); 416 417 pdd = kfd_bind_process_to_device(dev, p); 418 if (IS_ERR(pdd)) { 419 err = -ESRCH; 420 goto out; 421 } 422 423 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 424 ? cache_policy_coherent : cache_policy_noncoherent; 425 426 alternate_policy = 427 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 428 ? cache_policy_coherent : cache_policy_noncoherent; 429 430 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 431 &pdd->qpd, 432 default_policy, 433 alternate_policy, 434 (void __user *)args->alternate_aperture_base, 435 args->alternate_aperture_size)) 436 err = -EINVAL; 437 438 out: 439 mutex_unlock(&p->mutex); 440 441 return err; 442 } 443 444 static int kfd_ioctl_set_trap_handler(struct file *filep, 445 struct kfd_process *p, void *data) 446 { 447 struct kfd_ioctl_set_trap_handler_args *args = data; 448 struct kfd_dev *dev; 449 int err = 0; 450 struct kfd_process_device *pdd; 451 452 dev = kfd_device_by_id(args->gpu_id); 453 if (dev == NULL) 454 return -EINVAL; 455 456 mutex_lock(&p->mutex); 457 458 pdd = kfd_bind_process_to_device(dev, p); 459 if (IS_ERR(pdd)) { 460 err = -ESRCH; 461 goto out; 462 } 463 464 if (dev->dqm->ops.set_trap_handler(dev->dqm, 465 &pdd->qpd, 466 args->tba_addr, 467 args->tma_addr)) 468 err = -EINVAL; 469 470 out: 471 mutex_unlock(&p->mutex); 472 473 return err; 474 } 475 476 static int kfd_ioctl_dbg_register(struct file *filep, 477 struct kfd_process *p, void *data) 478 { 479 struct kfd_ioctl_dbg_register_args *args = data; 480 struct kfd_dev *dev; 481 struct kfd_dbgmgr *dbgmgr_ptr; 482 struct kfd_process_device *pdd; 483 bool create_ok; 484 long status = 0; 485 486 dev = kfd_device_by_id(args->gpu_id); 487 if (!dev) 488 return -EINVAL; 489 490 if (dev->device_info->asic_family == CHIP_CARRIZO) { 491 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 492 return -EINVAL; 493 } 494 495 mutex_lock(&p->mutex); 496 mutex_lock(kfd_get_dbgmgr_mutex()); 497 498 /* 499 * make sure that we have pdd, if this the first queue created for 500 * this process 501 */ 502 pdd = kfd_bind_process_to_device(dev, p); 503 if (IS_ERR(pdd)) { 504 status = PTR_ERR(pdd); 505 goto out; 506 } 507 508 if (!dev->dbgmgr) { 509 /* In case of a legal call, we have no dbgmgr yet */ 510 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 511 if (create_ok) { 512 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 513 if (status != 0) 514 kfd_dbgmgr_destroy(dbgmgr_ptr); 515 else 516 dev->dbgmgr = dbgmgr_ptr; 517 } 518 } else { 519 pr_debug("debugger already registered\n"); 520 status = -EINVAL; 521 } 522 523 out: 524 mutex_unlock(kfd_get_dbgmgr_mutex()); 525 mutex_unlock(&p->mutex); 526 527 return status; 528 } 529 530 static int kfd_ioctl_dbg_unregister(struct file *filep, 531 struct kfd_process *p, void *data) 532 { 533 struct kfd_ioctl_dbg_unregister_args *args = data; 534 struct kfd_dev *dev; 535 long status; 536 537 dev = kfd_device_by_id(args->gpu_id); 538 if (!dev || !dev->dbgmgr) 539 return -EINVAL; 540 541 if (dev->device_info->asic_family == CHIP_CARRIZO) { 542 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 543 return -EINVAL; 544 } 545 546 mutex_lock(kfd_get_dbgmgr_mutex()); 547 548 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 549 if (!status) { 550 kfd_dbgmgr_destroy(dev->dbgmgr); 551 dev->dbgmgr = NULL; 552 } 553 554 mutex_unlock(kfd_get_dbgmgr_mutex()); 555 556 return status; 557 } 558 559 /* 560 * Parse and generate variable size data structure for address watch. 561 * Total size of the buffer and # watch points is limited in order 562 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 563 * which is enforced by dbgdev module) 564 * please also note that the watch address itself are not "copied from user", 565 * since it be set into the HW in user mode values. 566 * 567 */ 568 static int kfd_ioctl_dbg_address_watch(struct file *filep, 569 struct kfd_process *p, void *data) 570 { 571 struct kfd_ioctl_dbg_address_watch_args *args = data; 572 struct kfd_dev *dev; 573 struct dbg_address_watch_info aw_info; 574 unsigned char *args_buff; 575 long status; 576 void __user *cmd_from_user; 577 uint64_t watch_mask_value = 0; 578 unsigned int args_idx = 0; 579 580 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 581 582 dev = kfd_device_by_id(args->gpu_id); 583 if (!dev) 584 return -EINVAL; 585 586 if (dev->device_info->asic_family == CHIP_CARRIZO) { 587 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 588 return -EINVAL; 589 } 590 591 cmd_from_user = (void __user *) args->content_ptr; 592 593 /* Validate arguments */ 594 595 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 596 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 597 (cmd_from_user == NULL)) 598 return -EINVAL; 599 600 /* this is the actual buffer to work with */ 601 args_buff = memdup_user(cmd_from_user, 602 args->buf_size_in_bytes - sizeof(*args)); 603 if (IS_ERR(args_buff)) 604 return PTR_ERR(args_buff); 605 606 aw_info.process = p; 607 608 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 609 args_idx += sizeof(aw_info.num_watch_points); 610 611 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 612 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 613 614 /* 615 * set watch address base pointer to point on the array base 616 * within args_buff 617 */ 618 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 619 620 /* skip over the addresses buffer */ 621 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 622 623 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 624 status = -EINVAL; 625 goto out; 626 } 627 628 watch_mask_value = (uint64_t) args_buff[args_idx]; 629 630 if (watch_mask_value > 0) { 631 /* 632 * There is an array of masks. 633 * set watch mask base pointer to point on the array base 634 * within args_buff 635 */ 636 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 637 638 /* skip over the masks buffer */ 639 args_idx += sizeof(aw_info.watch_mask) * 640 aw_info.num_watch_points; 641 } else { 642 /* just the NULL mask, set to NULL and skip over it */ 643 aw_info.watch_mask = NULL; 644 args_idx += sizeof(aw_info.watch_mask); 645 } 646 647 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 648 status = -EINVAL; 649 goto out; 650 } 651 652 /* Currently HSA Event is not supported for DBG */ 653 aw_info.watch_event = NULL; 654 655 mutex_lock(kfd_get_dbgmgr_mutex()); 656 657 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 658 659 mutex_unlock(kfd_get_dbgmgr_mutex()); 660 661 out: 662 kfree(args_buff); 663 664 return status; 665 } 666 667 /* Parse and generate fixed size data structure for wave control */ 668 static int kfd_ioctl_dbg_wave_control(struct file *filep, 669 struct kfd_process *p, void *data) 670 { 671 struct kfd_ioctl_dbg_wave_control_args *args = data; 672 struct kfd_dev *dev; 673 struct dbg_wave_control_info wac_info; 674 unsigned char *args_buff; 675 uint32_t computed_buff_size; 676 long status; 677 void __user *cmd_from_user; 678 unsigned int args_idx = 0; 679 680 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 681 682 /* we use compact form, independent of the packing attribute value */ 683 computed_buff_size = sizeof(*args) + 684 sizeof(wac_info.mode) + 685 sizeof(wac_info.operand) + 686 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 687 sizeof(wac_info.dbgWave_msg.MemoryVA) + 688 sizeof(wac_info.trapId); 689 690 dev = kfd_device_by_id(args->gpu_id); 691 if (!dev) 692 return -EINVAL; 693 694 if (dev->device_info->asic_family == CHIP_CARRIZO) { 695 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 696 return -EINVAL; 697 } 698 699 /* input size must match the computed "compact" size */ 700 if (args->buf_size_in_bytes != computed_buff_size) { 701 pr_debug("size mismatch, computed : actual %u : %u\n", 702 args->buf_size_in_bytes, computed_buff_size); 703 return -EINVAL; 704 } 705 706 cmd_from_user = (void __user *) args->content_ptr; 707 708 if (cmd_from_user == NULL) 709 return -EINVAL; 710 711 /* copy the entire buffer from user */ 712 713 args_buff = memdup_user(cmd_from_user, 714 args->buf_size_in_bytes - sizeof(*args)); 715 if (IS_ERR(args_buff)) 716 return PTR_ERR(args_buff); 717 718 /* move ptr to the start of the "pay-load" area */ 719 wac_info.process = p; 720 721 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 722 args_idx += sizeof(wac_info.operand); 723 724 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 725 args_idx += sizeof(wac_info.mode); 726 727 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 728 args_idx += sizeof(wac_info.trapId); 729 730 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 731 *((uint32_t *)(&args_buff[args_idx])); 732 wac_info.dbgWave_msg.MemoryVA = NULL; 733 734 mutex_lock(kfd_get_dbgmgr_mutex()); 735 736 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 737 wac_info.process, wac_info.operand, 738 wac_info.mode, wac_info.trapId, 739 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 740 741 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 742 743 pr_debug("Returned status of dbg manager is %ld\n", status); 744 745 mutex_unlock(kfd_get_dbgmgr_mutex()); 746 747 kfree(args_buff); 748 749 return status; 750 } 751 752 static int kfd_ioctl_get_clock_counters(struct file *filep, 753 struct kfd_process *p, void *data) 754 { 755 struct kfd_ioctl_get_clock_counters_args *args = data; 756 struct kfd_dev *dev; 757 struct timespec64 time; 758 759 dev = kfd_device_by_id(args->gpu_id); 760 if (dev) 761 /* Reading GPU clock counter from KGD */ 762 args->gpu_clock_counter = 763 dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); 764 else 765 /* Node without GPU resource */ 766 args->gpu_clock_counter = 0; 767 768 /* No access to rdtsc. Using raw monotonic time */ 769 getrawmonotonic64(&time); 770 args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); 771 772 get_monotonic_boottime64(&time); 773 args->system_clock_counter = (uint64_t)timespec64_to_ns(&time); 774 775 /* Since the counter is in nano-seconds we use 1GHz frequency */ 776 args->system_clock_freq = 1000000000; 777 778 return 0; 779 } 780 781 782 static int kfd_ioctl_get_process_apertures(struct file *filp, 783 struct kfd_process *p, void *data) 784 { 785 struct kfd_ioctl_get_process_apertures_args *args = data; 786 struct kfd_process_device_apertures *pAperture; 787 struct kfd_process_device *pdd; 788 789 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 790 791 args->num_of_nodes = 0; 792 793 mutex_lock(&p->mutex); 794 795 /*if the process-device list isn't empty*/ 796 if (kfd_has_process_device_data(p)) { 797 /* Run over all pdd of the process */ 798 pdd = kfd_get_first_process_device_data(p); 799 do { 800 pAperture = 801 &args->process_apertures[args->num_of_nodes]; 802 pAperture->gpu_id = pdd->dev->id; 803 pAperture->lds_base = pdd->lds_base; 804 pAperture->lds_limit = pdd->lds_limit; 805 pAperture->gpuvm_base = pdd->gpuvm_base; 806 pAperture->gpuvm_limit = pdd->gpuvm_limit; 807 pAperture->scratch_base = pdd->scratch_base; 808 pAperture->scratch_limit = pdd->scratch_limit; 809 810 dev_dbg(kfd_device, 811 "node id %u\n", args->num_of_nodes); 812 dev_dbg(kfd_device, 813 "gpu id %u\n", pdd->dev->id); 814 dev_dbg(kfd_device, 815 "lds_base %llX\n", pdd->lds_base); 816 dev_dbg(kfd_device, 817 "lds_limit %llX\n", pdd->lds_limit); 818 dev_dbg(kfd_device, 819 "gpuvm_base %llX\n", pdd->gpuvm_base); 820 dev_dbg(kfd_device, 821 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 822 dev_dbg(kfd_device, 823 "scratch_base %llX\n", pdd->scratch_base); 824 dev_dbg(kfd_device, 825 "scratch_limit %llX\n", pdd->scratch_limit); 826 827 args->num_of_nodes++; 828 829 pdd = kfd_get_next_process_device_data(p, pdd); 830 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 831 } 832 833 mutex_unlock(&p->mutex); 834 835 return 0; 836 } 837 838 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 839 struct kfd_process *p, void *data) 840 { 841 struct kfd_ioctl_get_process_apertures_new_args *args = data; 842 struct kfd_process_device_apertures *pa; 843 struct kfd_process_device *pdd; 844 uint32_t nodes = 0; 845 int ret; 846 847 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 848 849 if (args->num_of_nodes == 0) { 850 /* Return number of nodes, so that user space can alloacate 851 * sufficient memory 852 */ 853 mutex_lock(&p->mutex); 854 855 if (!kfd_has_process_device_data(p)) 856 goto out_unlock; 857 858 /* Run over all pdd of the process */ 859 pdd = kfd_get_first_process_device_data(p); 860 do { 861 args->num_of_nodes++; 862 pdd = kfd_get_next_process_device_data(p, pdd); 863 } while (pdd); 864 865 goto out_unlock; 866 } 867 868 /* Fill in process-aperture information for all available 869 * nodes, but not more than args->num_of_nodes as that is 870 * the amount of memory allocated by user 871 */ 872 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 873 args->num_of_nodes), GFP_KERNEL); 874 if (!pa) 875 return -ENOMEM; 876 877 mutex_lock(&p->mutex); 878 879 if (!kfd_has_process_device_data(p)) { 880 args->num_of_nodes = 0; 881 kfree(pa); 882 goto out_unlock; 883 } 884 885 /* Run over all pdd of the process */ 886 pdd = kfd_get_first_process_device_data(p); 887 do { 888 pa[nodes].gpu_id = pdd->dev->id; 889 pa[nodes].lds_base = pdd->lds_base; 890 pa[nodes].lds_limit = pdd->lds_limit; 891 pa[nodes].gpuvm_base = pdd->gpuvm_base; 892 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 893 pa[nodes].scratch_base = pdd->scratch_base; 894 pa[nodes].scratch_limit = pdd->scratch_limit; 895 896 dev_dbg(kfd_device, 897 "gpu id %u\n", pdd->dev->id); 898 dev_dbg(kfd_device, 899 "lds_base %llX\n", pdd->lds_base); 900 dev_dbg(kfd_device, 901 "lds_limit %llX\n", pdd->lds_limit); 902 dev_dbg(kfd_device, 903 "gpuvm_base %llX\n", pdd->gpuvm_base); 904 dev_dbg(kfd_device, 905 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 906 dev_dbg(kfd_device, 907 "scratch_base %llX\n", pdd->scratch_base); 908 dev_dbg(kfd_device, 909 "scratch_limit %llX\n", pdd->scratch_limit); 910 nodes++; 911 912 pdd = kfd_get_next_process_device_data(p, pdd); 913 } while (pdd && (nodes < args->num_of_nodes)); 914 mutex_unlock(&p->mutex); 915 916 args->num_of_nodes = nodes; 917 ret = copy_to_user( 918 (void __user *)args->kfd_process_device_apertures_ptr, 919 pa, 920 (nodes * sizeof(struct kfd_process_device_apertures))); 921 kfree(pa); 922 return ret ? -EFAULT : 0; 923 924 out_unlock: 925 mutex_unlock(&p->mutex); 926 return 0; 927 } 928 929 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 930 void *data) 931 { 932 struct kfd_ioctl_create_event_args *args = data; 933 int err; 934 935 /* For dGPUs the event page is allocated in user mode. The 936 * handle is passed to KFD with the first call to this IOCTL 937 * through the event_page_offset field. 938 */ 939 if (args->event_page_offset) { 940 struct kfd_dev *kfd; 941 struct kfd_process_device *pdd; 942 void *mem, *kern_addr; 943 uint64_t size; 944 945 if (p->signal_page) { 946 pr_err("Event page is already set\n"); 947 return -EINVAL; 948 } 949 950 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 951 if (!kfd) { 952 pr_err("Getting device by id failed in %s\n", __func__); 953 return -EINVAL; 954 } 955 956 mutex_lock(&p->mutex); 957 pdd = kfd_bind_process_to_device(kfd, p); 958 if (IS_ERR(pdd)) { 959 err = PTR_ERR(pdd); 960 goto out_unlock; 961 } 962 963 mem = kfd_process_device_translate_handle(pdd, 964 GET_IDR_HANDLE(args->event_page_offset)); 965 if (!mem) { 966 pr_err("Can't find BO, offset is 0x%llx\n", 967 args->event_page_offset); 968 err = -EINVAL; 969 goto out_unlock; 970 } 971 mutex_unlock(&p->mutex); 972 973 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, 974 mem, &kern_addr, &size); 975 if (err) { 976 pr_err("Failed to map event page to kernel\n"); 977 return err; 978 } 979 980 err = kfd_event_page_set(p, kern_addr, size); 981 if (err) { 982 pr_err("Failed to set event page\n"); 983 return err; 984 } 985 } 986 987 err = kfd_event_create(filp, p, args->event_type, 988 args->auto_reset != 0, args->node_id, 989 &args->event_id, &args->event_trigger_data, 990 &args->event_page_offset, 991 &args->event_slot_index); 992 993 return err; 994 995 out_unlock: 996 mutex_unlock(&p->mutex); 997 return err; 998 } 999 1000 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1001 void *data) 1002 { 1003 struct kfd_ioctl_destroy_event_args *args = data; 1004 1005 return kfd_event_destroy(p, args->event_id); 1006 } 1007 1008 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1009 void *data) 1010 { 1011 struct kfd_ioctl_set_event_args *args = data; 1012 1013 return kfd_set_event(p, args->event_id); 1014 } 1015 1016 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1017 void *data) 1018 { 1019 struct kfd_ioctl_reset_event_args *args = data; 1020 1021 return kfd_reset_event(p, args->event_id); 1022 } 1023 1024 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1025 void *data) 1026 { 1027 struct kfd_ioctl_wait_events_args *args = data; 1028 int err; 1029 1030 err = kfd_wait_on_events(p, args->num_events, 1031 (void __user *)args->events_ptr, 1032 (args->wait_for_all != 0), 1033 args->timeout, &args->wait_result); 1034 1035 return err; 1036 } 1037 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1038 struct kfd_process *p, void *data) 1039 { 1040 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1041 struct kfd_process_device *pdd; 1042 struct kfd_dev *dev; 1043 long err; 1044 1045 dev = kfd_device_by_id(args->gpu_id); 1046 if (!dev) 1047 return -EINVAL; 1048 1049 mutex_lock(&p->mutex); 1050 1051 pdd = kfd_bind_process_to_device(dev, p); 1052 if (IS_ERR(pdd)) { 1053 err = PTR_ERR(pdd); 1054 goto bind_process_to_device_fail; 1055 } 1056 1057 pdd->qpd.sh_hidden_private_base = args->va_addr; 1058 1059 mutex_unlock(&p->mutex); 1060 1061 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1062 pdd->qpd.vmid != 0) 1063 dev->kfd2kgd->set_scratch_backing_va( 1064 dev->kgd, args->va_addr, pdd->qpd.vmid); 1065 1066 return 0; 1067 1068 bind_process_to_device_fail: 1069 mutex_unlock(&p->mutex); 1070 return err; 1071 } 1072 1073 static int kfd_ioctl_get_tile_config(struct file *filep, 1074 struct kfd_process *p, void *data) 1075 { 1076 struct kfd_ioctl_get_tile_config_args *args = data; 1077 struct kfd_dev *dev; 1078 struct tile_config config; 1079 int err = 0; 1080 1081 dev = kfd_device_by_id(args->gpu_id); 1082 if (!dev) 1083 return -EINVAL; 1084 1085 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1086 1087 args->gb_addr_config = config.gb_addr_config; 1088 args->num_banks = config.num_banks; 1089 args->num_ranks = config.num_ranks; 1090 1091 if (args->num_tile_configs > config.num_tile_configs) 1092 args->num_tile_configs = config.num_tile_configs; 1093 err = copy_to_user((void __user *)args->tile_config_ptr, 1094 config.tile_config_ptr, 1095 args->num_tile_configs * sizeof(uint32_t)); 1096 if (err) { 1097 args->num_tile_configs = 0; 1098 return -EFAULT; 1099 } 1100 1101 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1102 args->num_macro_tile_configs = 1103 config.num_macro_tile_configs; 1104 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1105 config.macro_tile_config_ptr, 1106 args->num_macro_tile_configs * sizeof(uint32_t)); 1107 if (err) { 1108 args->num_macro_tile_configs = 0; 1109 return -EFAULT; 1110 } 1111 1112 return 0; 1113 } 1114 1115 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1116 void *data) 1117 { 1118 struct kfd_ioctl_acquire_vm_args *args = data; 1119 struct kfd_process_device *pdd; 1120 struct kfd_dev *dev; 1121 struct file *drm_file; 1122 int ret; 1123 1124 dev = kfd_device_by_id(args->gpu_id); 1125 if (!dev) 1126 return -EINVAL; 1127 1128 drm_file = fget(args->drm_fd); 1129 if (!drm_file) 1130 return -EINVAL; 1131 1132 mutex_lock(&p->mutex); 1133 1134 pdd = kfd_get_process_device_data(dev, p); 1135 if (!pdd) { 1136 ret = -EINVAL; 1137 goto err_unlock; 1138 } 1139 1140 if (pdd->drm_file) { 1141 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1142 goto err_unlock; 1143 } 1144 1145 ret = kfd_process_device_init_vm(pdd, drm_file); 1146 if (ret) 1147 goto err_unlock; 1148 /* On success, the PDD keeps the drm_file reference */ 1149 mutex_unlock(&p->mutex); 1150 1151 return 0; 1152 1153 err_unlock: 1154 mutex_unlock(&p->mutex); 1155 fput(drm_file); 1156 return ret; 1157 } 1158 1159 static bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1160 { 1161 struct kfd_local_mem_info mem_info; 1162 1163 if (debug_largebar) { 1164 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1165 return true; 1166 } 1167 1168 if (dev->device_info->needs_iommu_device) 1169 return false; 1170 1171 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); 1172 if (mem_info.local_mem_size_private == 0 && 1173 mem_info.local_mem_size_public > 0) 1174 return true; 1175 return false; 1176 } 1177 1178 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1179 struct kfd_process *p, void *data) 1180 { 1181 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1182 struct kfd_process_device *pdd; 1183 void *mem; 1184 struct kfd_dev *dev; 1185 int idr_handle; 1186 long err; 1187 uint64_t offset = args->mmap_offset; 1188 uint32_t flags = args->flags; 1189 1190 if (args->size == 0) 1191 return -EINVAL; 1192 1193 dev = kfd_device_by_id(args->gpu_id); 1194 if (!dev) 1195 return -EINVAL; 1196 1197 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1198 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1199 !kfd_dev_is_large_bar(dev)) { 1200 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1201 return -EINVAL; 1202 } 1203 1204 mutex_lock(&p->mutex); 1205 1206 pdd = kfd_bind_process_to_device(dev, p); 1207 if (IS_ERR(pdd)) { 1208 err = PTR_ERR(pdd); 1209 goto err_unlock; 1210 } 1211 1212 err = dev->kfd2kgd->alloc_memory_of_gpu( 1213 dev->kgd, args->va_addr, args->size, 1214 pdd->vm, (struct kgd_mem **) &mem, &offset, 1215 flags); 1216 1217 if (err) 1218 goto err_unlock; 1219 1220 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1221 if (idr_handle < 0) { 1222 err = -EFAULT; 1223 goto err_free; 1224 } 1225 1226 mutex_unlock(&p->mutex); 1227 1228 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1229 args->mmap_offset = offset; 1230 1231 return 0; 1232 1233 err_free: 1234 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1235 err_unlock: 1236 mutex_unlock(&p->mutex); 1237 return err; 1238 } 1239 1240 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1241 struct kfd_process *p, void *data) 1242 { 1243 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1244 struct kfd_process_device *pdd; 1245 void *mem; 1246 struct kfd_dev *dev; 1247 int ret; 1248 1249 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1250 if (!dev) 1251 return -EINVAL; 1252 1253 mutex_lock(&p->mutex); 1254 1255 pdd = kfd_get_process_device_data(dev, p); 1256 if (!pdd) { 1257 pr_err("Process device data doesn't exist\n"); 1258 ret = -EINVAL; 1259 goto err_unlock; 1260 } 1261 1262 mem = kfd_process_device_translate_handle( 1263 pdd, GET_IDR_HANDLE(args->handle)); 1264 if (!mem) { 1265 ret = -EINVAL; 1266 goto err_unlock; 1267 } 1268 1269 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1270 1271 /* If freeing the buffer failed, leave the handle in place for 1272 * clean-up during process tear-down. 1273 */ 1274 if (!ret) 1275 kfd_process_device_remove_obj_handle( 1276 pdd, GET_IDR_HANDLE(args->handle)); 1277 1278 err_unlock: 1279 mutex_unlock(&p->mutex); 1280 return ret; 1281 } 1282 1283 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1284 struct kfd_process *p, void *data) 1285 { 1286 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1287 struct kfd_process_device *pdd, *peer_pdd; 1288 void *mem; 1289 struct kfd_dev *dev, *peer; 1290 long err = 0; 1291 int i; 1292 uint32_t *devices_arr = NULL; 1293 1294 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1295 if (!dev) 1296 return -EINVAL; 1297 1298 if (!args->n_devices) { 1299 pr_debug("Device IDs array empty\n"); 1300 return -EINVAL; 1301 } 1302 if (args->n_success > args->n_devices) { 1303 pr_debug("n_success exceeds n_devices\n"); 1304 return -EINVAL; 1305 } 1306 1307 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1308 GFP_KERNEL); 1309 if (!devices_arr) 1310 return -ENOMEM; 1311 1312 err = copy_from_user(devices_arr, 1313 (void __user *)args->device_ids_array_ptr, 1314 args->n_devices * sizeof(*devices_arr)); 1315 if (err != 0) { 1316 err = -EFAULT; 1317 goto copy_from_user_failed; 1318 } 1319 1320 mutex_lock(&p->mutex); 1321 1322 pdd = kfd_bind_process_to_device(dev, p); 1323 if (IS_ERR(pdd)) { 1324 err = PTR_ERR(pdd); 1325 goto bind_process_to_device_failed; 1326 } 1327 1328 mem = kfd_process_device_translate_handle(pdd, 1329 GET_IDR_HANDLE(args->handle)); 1330 if (!mem) { 1331 err = -ENOMEM; 1332 goto get_mem_obj_from_handle_failed; 1333 } 1334 1335 for (i = args->n_success; i < args->n_devices; i++) { 1336 peer = kfd_device_by_id(devices_arr[i]); 1337 if (!peer) { 1338 pr_debug("Getting device by id failed for 0x%x\n", 1339 devices_arr[i]); 1340 err = -EINVAL; 1341 goto get_mem_obj_from_handle_failed; 1342 } 1343 1344 peer_pdd = kfd_bind_process_to_device(peer, p); 1345 if (IS_ERR(peer_pdd)) { 1346 err = PTR_ERR(peer_pdd); 1347 goto get_mem_obj_from_handle_failed; 1348 } 1349 err = peer->kfd2kgd->map_memory_to_gpu( 1350 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1351 if (err) { 1352 pr_err("Failed to map to gpu %d/%d\n", 1353 i, args->n_devices); 1354 goto map_memory_to_gpu_failed; 1355 } 1356 args->n_success = i+1; 1357 } 1358 1359 mutex_unlock(&p->mutex); 1360 1361 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1362 if (err) { 1363 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1364 goto sync_memory_failed; 1365 } 1366 1367 /* Flush TLBs after waiting for the page table updates to complete */ 1368 for (i = 0; i < args->n_devices; i++) { 1369 peer = kfd_device_by_id(devices_arr[i]); 1370 if (WARN_ON_ONCE(!peer)) 1371 continue; 1372 peer_pdd = kfd_get_process_device_data(peer, p); 1373 if (WARN_ON_ONCE(!peer_pdd)) 1374 continue; 1375 kfd_flush_tlb(peer_pdd); 1376 } 1377 1378 kfree(devices_arr); 1379 1380 return err; 1381 1382 bind_process_to_device_failed: 1383 get_mem_obj_from_handle_failed: 1384 map_memory_to_gpu_failed: 1385 mutex_unlock(&p->mutex); 1386 copy_from_user_failed: 1387 sync_memory_failed: 1388 kfree(devices_arr); 1389 1390 return err; 1391 } 1392 1393 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1394 struct kfd_process *p, void *data) 1395 { 1396 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1397 struct kfd_process_device *pdd, *peer_pdd; 1398 void *mem; 1399 struct kfd_dev *dev, *peer; 1400 long err = 0; 1401 uint32_t *devices_arr = NULL, i; 1402 1403 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1404 if (!dev) 1405 return -EINVAL; 1406 1407 if (!args->n_devices) { 1408 pr_debug("Device IDs array empty\n"); 1409 return -EINVAL; 1410 } 1411 if (args->n_success > args->n_devices) { 1412 pr_debug("n_success exceeds n_devices\n"); 1413 return -EINVAL; 1414 } 1415 1416 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1417 GFP_KERNEL); 1418 if (!devices_arr) 1419 return -ENOMEM; 1420 1421 err = copy_from_user(devices_arr, 1422 (void __user *)args->device_ids_array_ptr, 1423 args->n_devices * sizeof(*devices_arr)); 1424 if (err != 0) { 1425 err = -EFAULT; 1426 goto copy_from_user_failed; 1427 } 1428 1429 mutex_lock(&p->mutex); 1430 1431 pdd = kfd_get_process_device_data(dev, p); 1432 if (!pdd) { 1433 err = -EINVAL; 1434 goto bind_process_to_device_failed; 1435 } 1436 1437 mem = kfd_process_device_translate_handle(pdd, 1438 GET_IDR_HANDLE(args->handle)); 1439 if (!mem) { 1440 err = -ENOMEM; 1441 goto get_mem_obj_from_handle_failed; 1442 } 1443 1444 for (i = args->n_success; i < args->n_devices; i++) { 1445 peer = kfd_device_by_id(devices_arr[i]); 1446 if (!peer) { 1447 err = -EINVAL; 1448 goto get_mem_obj_from_handle_failed; 1449 } 1450 1451 peer_pdd = kfd_get_process_device_data(peer, p); 1452 if (!peer_pdd) { 1453 err = -ENODEV; 1454 goto get_mem_obj_from_handle_failed; 1455 } 1456 err = dev->kfd2kgd->unmap_memory_to_gpu( 1457 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1458 if (err) { 1459 pr_err("Failed to unmap from gpu %d/%d\n", 1460 i, args->n_devices); 1461 goto unmap_memory_from_gpu_failed; 1462 } 1463 args->n_success = i+1; 1464 } 1465 kfree(devices_arr); 1466 1467 mutex_unlock(&p->mutex); 1468 1469 return 0; 1470 1471 bind_process_to_device_failed: 1472 get_mem_obj_from_handle_failed: 1473 unmap_memory_from_gpu_failed: 1474 mutex_unlock(&p->mutex); 1475 copy_from_user_failed: 1476 kfree(devices_arr); 1477 return err; 1478 } 1479 1480 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1481 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1482 .cmd_drv = 0, .name = #ioctl} 1483 1484 /** Ioctl table */ 1485 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1486 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1487 kfd_ioctl_get_version, 0), 1488 1489 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1490 kfd_ioctl_create_queue, 0), 1491 1492 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1493 kfd_ioctl_destroy_queue, 0), 1494 1495 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1496 kfd_ioctl_set_memory_policy, 0), 1497 1498 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1499 kfd_ioctl_get_clock_counters, 0), 1500 1501 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1502 kfd_ioctl_get_process_apertures, 0), 1503 1504 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1505 kfd_ioctl_update_queue, 0), 1506 1507 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1508 kfd_ioctl_create_event, 0), 1509 1510 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1511 kfd_ioctl_destroy_event, 0), 1512 1513 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1514 kfd_ioctl_set_event, 0), 1515 1516 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1517 kfd_ioctl_reset_event, 0), 1518 1519 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1520 kfd_ioctl_wait_events, 0), 1521 1522 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1523 kfd_ioctl_dbg_register, 0), 1524 1525 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1526 kfd_ioctl_dbg_unregister, 0), 1527 1528 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1529 kfd_ioctl_dbg_address_watch, 0), 1530 1531 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1532 kfd_ioctl_dbg_wave_control, 0), 1533 1534 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1535 kfd_ioctl_set_scratch_backing_va, 0), 1536 1537 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1538 kfd_ioctl_get_tile_config, 0), 1539 1540 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1541 kfd_ioctl_set_trap_handler, 0), 1542 1543 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1544 kfd_ioctl_get_process_apertures_new, 0), 1545 1546 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1547 kfd_ioctl_acquire_vm, 0), 1548 1549 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1550 kfd_ioctl_alloc_memory_of_gpu, 0), 1551 1552 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1553 kfd_ioctl_free_memory_of_gpu, 0), 1554 1555 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1556 kfd_ioctl_map_memory_to_gpu, 0), 1557 1558 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1559 kfd_ioctl_unmap_memory_from_gpu, 0), 1560 1561 }; 1562 1563 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1564 1565 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1566 { 1567 struct kfd_process *process; 1568 amdkfd_ioctl_t *func; 1569 const struct amdkfd_ioctl_desc *ioctl = NULL; 1570 unsigned int nr = _IOC_NR(cmd); 1571 char stack_kdata[128]; 1572 char *kdata = NULL; 1573 unsigned int usize, asize; 1574 int retcode = -EINVAL; 1575 1576 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1577 goto err_i1; 1578 1579 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1580 u32 amdkfd_size; 1581 1582 ioctl = &amdkfd_ioctls[nr]; 1583 1584 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1585 usize = asize = _IOC_SIZE(cmd); 1586 if (amdkfd_size > asize) 1587 asize = amdkfd_size; 1588 1589 cmd = ioctl->cmd; 1590 } else 1591 goto err_i1; 1592 1593 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1594 1595 process = kfd_get_process(current); 1596 if (IS_ERR(process)) { 1597 dev_dbg(kfd_device, "no process\n"); 1598 goto err_i1; 1599 } 1600 1601 /* Do not trust userspace, use our own definition */ 1602 func = ioctl->func; 1603 1604 if (unlikely(!func)) { 1605 dev_dbg(kfd_device, "no function\n"); 1606 retcode = -EINVAL; 1607 goto err_i1; 1608 } 1609 1610 if (cmd & (IOC_IN | IOC_OUT)) { 1611 if (asize <= sizeof(stack_kdata)) { 1612 kdata = stack_kdata; 1613 } else { 1614 kdata = kmalloc(asize, GFP_KERNEL); 1615 if (!kdata) { 1616 retcode = -ENOMEM; 1617 goto err_i1; 1618 } 1619 } 1620 if (asize > usize) 1621 memset(kdata + usize, 0, asize - usize); 1622 } 1623 1624 if (cmd & IOC_IN) { 1625 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1626 retcode = -EFAULT; 1627 goto err_i1; 1628 } 1629 } else if (cmd & IOC_OUT) { 1630 memset(kdata, 0, usize); 1631 } 1632 1633 retcode = func(filep, process, kdata); 1634 1635 if (cmd & IOC_OUT) 1636 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1637 retcode = -EFAULT; 1638 1639 err_i1: 1640 if (!ioctl) 1641 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1642 task_pid_nr(current), cmd, nr); 1643 1644 if (kdata != stack_kdata) 1645 kfree(kdata); 1646 1647 if (retcode) 1648 dev_dbg(kfd_device, "ret = %d\n", retcode); 1649 1650 return retcode; 1651 } 1652 1653 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1654 { 1655 struct kfd_process *process; 1656 struct kfd_dev *dev = NULL; 1657 unsigned long vm_pgoff; 1658 unsigned int gpu_id; 1659 1660 process = kfd_get_process(current); 1661 if (IS_ERR(process)) 1662 return PTR_ERR(process); 1663 1664 vm_pgoff = vma->vm_pgoff; 1665 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1666 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1667 if (gpu_id) 1668 dev = kfd_device_by_id(gpu_id); 1669 1670 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1671 case KFD_MMAP_TYPE_DOORBELL: 1672 if (!dev) 1673 return -ENODEV; 1674 return kfd_doorbell_mmap(dev, process, vma); 1675 1676 case KFD_MMAP_TYPE_EVENTS: 1677 return kfd_event_mmap(process, vma); 1678 1679 case KFD_MMAP_TYPE_RESERVED_MEM: 1680 if (!dev) 1681 return -ENODEV; 1682 return kfd_reserved_mem_mmap(dev, process, vma); 1683 } 1684 1685 return -EFAULT; 1686 } 1687