1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <asm/processor.h> 37 #include "kfd_priv.h" 38 #include "kfd_device_queue_manager.h" 39 #include "kfd_dbgmgr.h" 40 41 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 42 static int kfd_open(struct inode *, struct file *); 43 static int kfd_mmap(struct file *, struct vm_area_struct *); 44 45 static const char kfd_dev_name[] = "kfd"; 46 47 static const struct file_operations kfd_fops = { 48 .owner = THIS_MODULE, 49 .unlocked_ioctl = kfd_ioctl, 50 .compat_ioctl = kfd_ioctl, 51 .open = kfd_open, 52 .mmap = kfd_mmap, 53 }; 54 55 static int kfd_char_dev_major = -1; 56 static struct class *kfd_class; 57 struct device *kfd_device; 58 59 int kfd_chardev_init(void) 60 { 61 int err = 0; 62 63 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 64 err = kfd_char_dev_major; 65 if (err < 0) 66 goto err_register_chrdev; 67 68 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 69 err = PTR_ERR(kfd_class); 70 if (IS_ERR(kfd_class)) 71 goto err_class_create; 72 73 kfd_device = device_create(kfd_class, NULL, 74 MKDEV(kfd_char_dev_major, 0), 75 NULL, kfd_dev_name); 76 err = PTR_ERR(kfd_device); 77 if (IS_ERR(kfd_device)) 78 goto err_device_create; 79 80 return 0; 81 82 err_device_create: 83 class_destroy(kfd_class); 84 err_class_create: 85 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 86 err_register_chrdev: 87 return err; 88 } 89 90 void kfd_chardev_exit(void) 91 { 92 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 93 class_destroy(kfd_class); 94 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 95 } 96 97 struct device *kfd_chardev(void) 98 { 99 return kfd_device; 100 } 101 102 103 static int kfd_open(struct inode *inode, struct file *filep) 104 { 105 struct kfd_process *process; 106 bool is_32bit_user_mode; 107 108 if (iminor(inode) != 0) 109 return -ENODEV; 110 111 is_32bit_user_mode = in_compat_syscall(); 112 113 if (is_32bit_user_mode) { 114 dev_warn(kfd_device, 115 "Process %d (32-bit) failed to open /dev/kfd\n" 116 "32-bit processes are not supported by amdkfd\n", 117 current->pid); 118 return -EPERM; 119 } 120 121 process = kfd_create_process(filep); 122 if (IS_ERR(process)) 123 return PTR_ERR(process); 124 125 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 126 process->pasid, process->is_32bit_user_mode); 127 128 return 0; 129 } 130 131 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 132 void *data) 133 { 134 struct kfd_ioctl_get_version_args *args = data; 135 136 args->major_version = KFD_IOCTL_MAJOR_VERSION; 137 args->minor_version = KFD_IOCTL_MINOR_VERSION; 138 139 return 0; 140 } 141 142 static int set_queue_properties_from_user(struct queue_properties *q_properties, 143 struct kfd_ioctl_create_queue_args *args) 144 { 145 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 146 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 147 return -EINVAL; 148 } 149 150 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 151 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 152 return -EINVAL; 153 } 154 155 if ((args->ring_base_address) && 156 (!access_ok(VERIFY_WRITE, 157 (const void __user *) args->ring_base_address, 158 sizeof(uint64_t)))) { 159 pr_err("Can't access ring base address\n"); 160 return -EFAULT; 161 } 162 163 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 164 pr_err("Ring size must be a power of 2 or 0\n"); 165 return -EINVAL; 166 } 167 168 if (!access_ok(VERIFY_WRITE, 169 (const void __user *) args->read_pointer_address, 170 sizeof(uint32_t))) { 171 pr_err("Can't access read pointer\n"); 172 return -EFAULT; 173 } 174 175 if (!access_ok(VERIFY_WRITE, 176 (const void __user *) args->write_pointer_address, 177 sizeof(uint32_t))) { 178 pr_err("Can't access write pointer\n"); 179 return -EFAULT; 180 } 181 182 if (args->eop_buffer_address && 183 !access_ok(VERIFY_WRITE, 184 (const void __user *) args->eop_buffer_address, 185 sizeof(uint32_t))) { 186 pr_debug("Can't access eop buffer"); 187 return -EFAULT; 188 } 189 190 if (args->ctx_save_restore_address && 191 !access_ok(VERIFY_WRITE, 192 (const void __user *) args->ctx_save_restore_address, 193 sizeof(uint32_t))) { 194 pr_debug("Can't access ctx save restore buffer"); 195 return -EFAULT; 196 } 197 198 q_properties->is_interop = false; 199 q_properties->queue_percent = args->queue_percentage; 200 q_properties->priority = args->queue_priority; 201 q_properties->queue_address = args->ring_base_address; 202 q_properties->queue_size = args->ring_size; 203 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 204 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 205 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 206 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 207 q_properties->ctx_save_restore_area_address = 208 args->ctx_save_restore_address; 209 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 210 q_properties->ctl_stack_size = args->ctl_stack_size; 211 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 212 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 213 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 214 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 215 q_properties->type = KFD_QUEUE_TYPE_SDMA; 216 else 217 return -ENOTSUPP; 218 219 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 220 q_properties->format = KFD_QUEUE_FORMAT_AQL; 221 else 222 q_properties->format = KFD_QUEUE_FORMAT_PM4; 223 224 pr_debug("Queue Percentage: %d, %d\n", 225 q_properties->queue_percent, args->queue_percentage); 226 227 pr_debug("Queue Priority: %d, %d\n", 228 q_properties->priority, args->queue_priority); 229 230 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 231 q_properties->queue_address, args->ring_base_address); 232 233 pr_debug("Queue Size: 0x%llX, %u\n", 234 q_properties->queue_size, args->ring_size); 235 236 pr_debug("Queue r/w Pointers: %p, %p\n", 237 q_properties->read_ptr, 238 q_properties->write_ptr); 239 240 pr_debug("Queue Format: %d\n", q_properties->format); 241 242 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 243 244 pr_debug("Queue CTX save area: 0x%llX\n", 245 q_properties->ctx_save_restore_area_address); 246 247 return 0; 248 } 249 250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 251 void *data) 252 { 253 struct kfd_ioctl_create_queue_args *args = data; 254 struct kfd_dev *dev; 255 int err = 0; 256 unsigned int queue_id; 257 struct kfd_process_device *pdd; 258 struct queue_properties q_properties; 259 260 memset(&q_properties, 0, sizeof(struct queue_properties)); 261 262 pr_debug("Creating queue ioctl\n"); 263 264 err = set_queue_properties_from_user(&q_properties, args); 265 if (err) 266 return err; 267 268 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 269 dev = kfd_device_by_id(args->gpu_id); 270 if (!dev) { 271 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 272 return -EINVAL; 273 } 274 275 mutex_lock(&p->mutex); 276 277 pdd = kfd_bind_process_to_device(dev, p); 278 if (IS_ERR(pdd)) { 279 err = -ESRCH; 280 goto err_bind_process; 281 } 282 283 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 284 p->pasid, 285 dev->id); 286 287 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 288 if (err != 0) 289 goto err_create_queue; 290 291 args->queue_id = queue_id; 292 293 294 /* Return gpu_id as doorbell offset for mmap usage */ 295 args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); 296 args->doorbell_offset <<= PAGE_SHIFT; 297 298 mutex_unlock(&p->mutex); 299 300 pr_debug("Queue id %d was created successfully\n", args->queue_id); 301 302 pr_debug("Ring buffer address == 0x%016llX\n", 303 args->ring_base_address); 304 305 pr_debug("Read ptr address == 0x%016llX\n", 306 args->read_pointer_address); 307 308 pr_debug("Write ptr address == 0x%016llX\n", 309 args->write_pointer_address); 310 311 return 0; 312 313 err_create_queue: 314 err_bind_process: 315 mutex_unlock(&p->mutex); 316 return err; 317 } 318 319 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 320 void *data) 321 { 322 int retval; 323 struct kfd_ioctl_destroy_queue_args *args = data; 324 325 pr_debug("Destroying queue id %d for pasid %d\n", 326 args->queue_id, 327 p->pasid); 328 329 mutex_lock(&p->mutex); 330 331 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 332 333 mutex_unlock(&p->mutex); 334 return retval; 335 } 336 337 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 338 void *data) 339 { 340 int retval; 341 struct kfd_ioctl_update_queue_args *args = data; 342 struct queue_properties properties; 343 344 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 345 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 346 return -EINVAL; 347 } 348 349 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 350 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 351 return -EINVAL; 352 } 353 354 if ((args->ring_base_address) && 355 (!access_ok(VERIFY_WRITE, 356 (const void __user *) args->ring_base_address, 357 sizeof(uint64_t)))) { 358 pr_err("Can't access ring base address\n"); 359 return -EFAULT; 360 } 361 362 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 363 pr_err("Ring size must be a power of 2 or 0\n"); 364 return -EINVAL; 365 } 366 367 properties.queue_address = args->ring_base_address; 368 properties.queue_size = args->ring_size; 369 properties.queue_percent = args->queue_percentage; 370 properties.priority = args->queue_priority; 371 372 pr_debug("Updating queue id %d for pasid %d\n", 373 args->queue_id, p->pasid); 374 375 mutex_lock(&p->mutex); 376 377 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 378 379 mutex_unlock(&p->mutex); 380 381 return retval; 382 } 383 384 static int kfd_ioctl_set_memory_policy(struct file *filep, 385 struct kfd_process *p, void *data) 386 { 387 struct kfd_ioctl_set_memory_policy_args *args = data; 388 struct kfd_dev *dev; 389 int err = 0; 390 struct kfd_process_device *pdd; 391 enum cache_policy default_policy, alternate_policy; 392 393 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 394 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 395 return -EINVAL; 396 } 397 398 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 399 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 400 return -EINVAL; 401 } 402 403 dev = kfd_device_by_id(args->gpu_id); 404 if (!dev) 405 return -EINVAL; 406 407 mutex_lock(&p->mutex); 408 409 pdd = kfd_bind_process_to_device(dev, p); 410 if (IS_ERR(pdd)) { 411 err = -ESRCH; 412 goto out; 413 } 414 415 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 416 ? cache_policy_coherent : cache_policy_noncoherent; 417 418 alternate_policy = 419 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 420 ? cache_policy_coherent : cache_policy_noncoherent; 421 422 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 423 &pdd->qpd, 424 default_policy, 425 alternate_policy, 426 (void __user *)args->alternate_aperture_base, 427 args->alternate_aperture_size)) 428 err = -EINVAL; 429 430 out: 431 mutex_unlock(&p->mutex); 432 433 return err; 434 } 435 436 static int kfd_ioctl_set_trap_handler(struct file *filep, 437 struct kfd_process *p, void *data) 438 { 439 struct kfd_ioctl_set_trap_handler_args *args = data; 440 struct kfd_dev *dev; 441 int err = 0; 442 struct kfd_process_device *pdd; 443 444 dev = kfd_device_by_id(args->gpu_id); 445 if (dev == NULL) 446 return -EINVAL; 447 448 mutex_lock(&p->mutex); 449 450 pdd = kfd_bind_process_to_device(dev, p); 451 if (IS_ERR(pdd)) { 452 err = -ESRCH; 453 goto out; 454 } 455 456 if (dev->dqm->ops.set_trap_handler(dev->dqm, 457 &pdd->qpd, 458 args->tba_addr, 459 args->tma_addr)) 460 err = -EINVAL; 461 462 out: 463 mutex_unlock(&p->mutex); 464 465 return err; 466 } 467 468 static int kfd_ioctl_dbg_register(struct file *filep, 469 struct kfd_process *p, void *data) 470 { 471 struct kfd_ioctl_dbg_register_args *args = data; 472 struct kfd_dev *dev; 473 struct kfd_dbgmgr *dbgmgr_ptr; 474 struct kfd_process_device *pdd; 475 bool create_ok; 476 long status = 0; 477 478 dev = kfd_device_by_id(args->gpu_id); 479 if (!dev) 480 return -EINVAL; 481 482 if (dev->device_info->asic_family == CHIP_CARRIZO) { 483 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 484 return -EINVAL; 485 } 486 487 mutex_lock(&p->mutex); 488 mutex_lock(kfd_get_dbgmgr_mutex()); 489 490 /* 491 * make sure that we have pdd, if this the first queue created for 492 * this process 493 */ 494 pdd = kfd_bind_process_to_device(dev, p); 495 if (IS_ERR(pdd)) { 496 status = PTR_ERR(pdd); 497 goto out; 498 } 499 500 if (!dev->dbgmgr) { 501 /* In case of a legal call, we have no dbgmgr yet */ 502 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 503 if (create_ok) { 504 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 505 if (status != 0) 506 kfd_dbgmgr_destroy(dbgmgr_ptr); 507 else 508 dev->dbgmgr = dbgmgr_ptr; 509 } 510 } else { 511 pr_debug("debugger already registered\n"); 512 status = -EINVAL; 513 } 514 515 out: 516 mutex_unlock(kfd_get_dbgmgr_mutex()); 517 mutex_unlock(&p->mutex); 518 519 return status; 520 } 521 522 static int kfd_ioctl_dbg_unregister(struct file *filep, 523 struct kfd_process *p, void *data) 524 { 525 struct kfd_ioctl_dbg_unregister_args *args = data; 526 struct kfd_dev *dev; 527 long status; 528 529 dev = kfd_device_by_id(args->gpu_id); 530 if (!dev || !dev->dbgmgr) 531 return -EINVAL; 532 533 if (dev->device_info->asic_family == CHIP_CARRIZO) { 534 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 535 return -EINVAL; 536 } 537 538 mutex_lock(kfd_get_dbgmgr_mutex()); 539 540 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 541 if (!status) { 542 kfd_dbgmgr_destroy(dev->dbgmgr); 543 dev->dbgmgr = NULL; 544 } 545 546 mutex_unlock(kfd_get_dbgmgr_mutex()); 547 548 return status; 549 } 550 551 /* 552 * Parse and generate variable size data structure for address watch. 553 * Total size of the buffer and # watch points is limited in order 554 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 555 * which is enforced by dbgdev module) 556 * please also note that the watch address itself are not "copied from user", 557 * since it be set into the HW in user mode values. 558 * 559 */ 560 static int kfd_ioctl_dbg_address_watch(struct file *filep, 561 struct kfd_process *p, void *data) 562 { 563 struct kfd_ioctl_dbg_address_watch_args *args = data; 564 struct kfd_dev *dev; 565 struct dbg_address_watch_info aw_info; 566 unsigned char *args_buff; 567 long status; 568 void __user *cmd_from_user; 569 uint64_t watch_mask_value = 0; 570 unsigned int args_idx = 0; 571 572 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 573 574 dev = kfd_device_by_id(args->gpu_id); 575 if (!dev) 576 return -EINVAL; 577 578 if (dev->device_info->asic_family == CHIP_CARRIZO) { 579 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 580 return -EINVAL; 581 } 582 583 cmd_from_user = (void __user *) args->content_ptr; 584 585 /* Validate arguments */ 586 587 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 588 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 589 (cmd_from_user == NULL)) 590 return -EINVAL; 591 592 /* this is the actual buffer to work with */ 593 args_buff = memdup_user(cmd_from_user, 594 args->buf_size_in_bytes - sizeof(*args)); 595 if (IS_ERR(args_buff)) 596 return PTR_ERR(args_buff); 597 598 aw_info.process = p; 599 600 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 601 args_idx += sizeof(aw_info.num_watch_points); 602 603 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 604 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 605 606 /* 607 * set watch address base pointer to point on the array base 608 * within args_buff 609 */ 610 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 611 612 /* skip over the addresses buffer */ 613 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 614 615 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 616 status = -EINVAL; 617 goto out; 618 } 619 620 watch_mask_value = (uint64_t) args_buff[args_idx]; 621 622 if (watch_mask_value > 0) { 623 /* 624 * There is an array of masks. 625 * set watch mask base pointer to point on the array base 626 * within args_buff 627 */ 628 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 629 630 /* skip over the masks buffer */ 631 args_idx += sizeof(aw_info.watch_mask) * 632 aw_info.num_watch_points; 633 } else { 634 /* just the NULL mask, set to NULL and skip over it */ 635 aw_info.watch_mask = NULL; 636 args_idx += sizeof(aw_info.watch_mask); 637 } 638 639 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 640 status = -EINVAL; 641 goto out; 642 } 643 644 /* Currently HSA Event is not supported for DBG */ 645 aw_info.watch_event = NULL; 646 647 mutex_lock(kfd_get_dbgmgr_mutex()); 648 649 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 650 651 mutex_unlock(kfd_get_dbgmgr_mutex()); 652 653 out: 654 kfree(args_buff); 655 656 return status; 657 } 658 659 /* Parse and generate fixed size data structure for wave control */ 660 static int kfd_ioctl_dbg_wave_control(struct file *filep, 661 struct kfd_process *p, void *data) 662 { 663 struct kfd_ioctl_dbg_wave_control_args *args = data; 664 struct kfd_dev *dev; 665 struct dbg_wave_control_info wac_info; 666 unsigned char *args_buff; 667 uint32_t computed_buff_size; 668 long status; 669 void __user *cmd_from_user; 670 unsigned int args_idx = 0; 671 672 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 673 674 /* we use compact form, independent of the packing attribute value */ 675 computed_buff_size = sizeof(*args) + 676 sizeof(wac_info.mode) + 677 sizeof(wac_info.operand) + 678 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 679 sizeof(wac_info.dbgWave_msg.MemoryVA) + 680 sizeof(wac_info.trapId); 681 682 dev = kfd_device_by_id(args->gpu_id); 683 if (!dev) 684 return -EINVAL; 685 686 if (dev->device_info->asic_family == CHIP_CARRIZO) { 687 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 688 return -EINVAL; 689 } 690 691 /* input size must match the computed "compact" size */ 692 if (args->buf_size_in_bytes != computed_buff_size) { 693 pr_debug("size mismatch, computed : actual %u : %u\n", 694 args->buf_size_in_bytes, computed_buff_size); 695 return -EINVAL; 696 } 697 698 cmd_from_user = (void __user *) args->content_ptr; 699 700 if (cmd_from_user == NULL) 701 return -EINVAL; 702 703 /* copy the entire buffer from user */ 704 705 args_buff = memdup_user(cmd_from_user, 706 args->buf_size_in_bytes - sizeof(*args)); 707 if (IS_ERR(args_buff)) 708 return PTR_ERR(args_buff); 709 710 /* move ptr to the start of the "pay-load" area */ 711 wac_info.process = p; 712 713 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 714 args_idx += sizeof(wac_info.operand); 715 716 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 717 args_idx += sizeof(wac_info.mode); 718 719 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 720 args_idx += sizeof(wac_info.trapId); 721 722 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 723 *((uint32_t *)(&args_buff[args_idx])); 724 wac_info.dbgWave_msg.MemoryVA = NULL; 725 726 mutex_lock(kfd_get_dbgmgr_mutex()); 727 728 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 729 wac_info.process, wac_info.operand, 730 wac_info.mode, wac_info.trapId, 731 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 732 733 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 734 735 pr_debug("Returned status of dbg manager is %ld\n", status); 736 737 mutex_unlock(kfd_get_dbgmgr_mutex()); 738 739 kfree(args_buff); 740 741 return status; 742 } 743 744 static int kfd_ioctl_get_clock_counters(struct file *filep, 745 struct kfd_process *p, void *data) 746 { 747 struct kfd_ioctl_get_clock_counters_args *args = data; 748 struct kfd_dev *dev; 749 struct timespec64 time; 750 751 dev = kfd_device_by_id(args->gpu_id); 752 if (dev == NULL) 753 return -EINVAL; 754 755 /* Reading GPU clock counter from KGD */ 756 args->gpu_clock_counter = 757 dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); 758 759 /* No access to rdtsc. Using raw monotonic time */ 760 getrawmonotonic64(&time); 761 args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); 762 763 get_monotonic_boottime64(&time); 764 args->system_clock_counter = (uint64_t)timespec64_to_ns(&time); 765 766 /* Since the counter is in nano-seconds we use 1GHz frequency */ 767 args->system_clock_freq = 1000000000; 768 769 return 0; 770 } 771 772 773 static int kfd_ioctl_get_process_apertures(struct file *filp, 774 struct kfd_process *p, void *data) 775 { 776 struct kfd_ioctl_get_process_apertures_args *args = data; 777 struct kfd_process_device_apertures *pAperture; 778 struct kfd_process_device *pdd; 779 780 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 781 782 args->num_of_nodes = 0; 783 784 mutex_lock(&p->mutex); 785 786 /*if the process-device list isn't empty*/ 787 if (kfd_has_process_device_data(p)) { 788 /* Run over all pdd of the process */ 789 pdd = kfd_get_first_process_device_data(p); 790 do { 791 pAperture = 792 &args->process_apertures[args->num_of_nodes]; 793 pAperture->gpu_id = pdd->dev->id; 794 pAperture->lds_base = pdd->lds_base; 795 pAperture->lds_limit = pdd->lds_limit; 796 pAperture->gpuvm_base = pdd->gpuvm_base; 797 pAperture->gpuvm_limit = pdd->gpuvm_limit; 798 pAperture->scratch_base = pdd->scratch_base; 799 pAperture->scratch_limit = pdd->scratch_limit; 800 801 dev_dbg(kfd_device, 802 "node id %u\n", args->num_of_nodes); 803 dev_dbg(kfd_device, 804 "gpu id %u\n", pdd->dev->id); 805 dev_dbg(kfd_device, 806 "lds_base %llX\n", pdd->lds_base); 807 dev_dbg(kfd_device, 808 "lds_limit %llX\n", pdd->lds_limit); 809 dev_dbg(kfd_device, 810 "gpuvm_base %llX\n", pdd->gpuvm_base); 811 dev_dbg(kfd_device, 812 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 813 dev_dbg(kfd_device, 814 "scratch_base %llX\n", pdd->scratch_base); 815 dev_dbg(kfd_device, 816 "scratch_limit %llX\n", pdd->scratch_limit); 817 818 args->num_of_nodes++; 819 820 pdd = kfd_get_next_process_device_data(p, pdd); 821 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 822 } 823 824 mutex_unlock(&p->mutex); 825 826 return 0; 827 } 828 829 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 830 struct kfd_process *p, void *data) 831 { 832 struct kfd_ioctl_get_process_apertures_new_args *args = data; 833 struct kfd_process_device_apertures *pa; 834 struct kfd_process_device *pdd; 835 uint32_t nodes = 0; 836 int ret; 837 838 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 839 840 if (args->num_of_nodes == 0) { 841 /* Return number of nodes, so that user space can alloacate 842 * sufficient memory 843 */ 844 mutex_lock(&p->mutex); 845 846 if (!kfd_has_process_device_data(p)) 847 goto out_unlock; 848 849 /* Run over all pdd of the process */ 850 pdd = kfd_get_first_process_device_data(p); 851 do { 852 args->num_of_nodes++; 853 pdd = kfd_get_next_process_device_data(p, pdd); 854 } while (pdd); 855 856 goto out_unlock; 857 } 858 859 /* Fill in process-aperture information for all available 860 * nodes, but not more than args->num_of_nodes as that is 861 * the amount of memory allocated by user 862 */ 863 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 864 args->num_of_nodes), GFP_KERNEL); 865 if (!pa) 866 return -ENOMEM; 867 868 mutex_lock(&p->mutex); 869 870 if (!kfd_has_process_device_data(p)) { 871 args->num_of_nodes = 0; 872 kfree(pa); 873 goto out_unlock; 874 } 875 876 /* Run over all pdd of the process */ 877 pdd = kfd_get_first_process_device_data(p); 878 do { 879 pa[nodes].gpu_id = pdd->dev->id; 880 pa[nodes].lds_base = pdd->lds_base; 881 pa[nodes].lds_limit = pdd->lds_limit; 882 pa[nodes].gpuvm_base = pdd->gpuvm_base; 883 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 884 pa[nodes].scratch_base = pdd->scratch_base; 885 pa[nodes].scratch_limit = pdd->scratch_limit; 886 887 dev_dbg(kfd_device, 888 "gpu id %u\n", pdd->dev->id); 889 dev_dbg(kfd_device, 890 "lds_base %llX\n", pdd->lds_base); 891 dev_dbg(kfd_device, 892 "lds_limit %llX\n", pdd->lds_limit); 893 dev_dbg(kfd_device, 894 "gpuvm_base %llX\n", pdd->gpuvm_base); 895 dev_dbg(kfd_device, 896 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 897 dev_dbg(kfd_device, 898 "scratch_base %llX\n", pdd->scratch_base); 899 dev_dbg(kfd_device, 900 "scratch_limit %llX\n", pdd->scratch_limit); 901 nodes++; 902 903 pdd = kfd_get_next_process_device_data(p, pdd); 904 } while (pdd && (nodes < args->num_of_nodes)); 905 mutex_unlock(&p->mutex); 906 907 args->num_of_nodes = nodes; 908 ret = copy_to_user( 909 (void __user *)args->kfd_process_device_apertures_ptr, 910 pa, 911 (nodes * sizeof(struct kfd_process_device_apertures))); 912 kfree(pa); 913 return ret ? -EFAULT : 0; 914 915 out_unlock: 916 mutex_unlock(&p->mutex); 917 return 0; 918 } 919 920 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 921 void *data) 922 { 923 struct kfd_ioctl_create_event_args *args = data; 924 int err; 925 926 /* For dGPUs the event page is allocated in user mode. The 927 * handle is passed to KFD with the first call to this IOCTL 928 * through the event_page_offset field. 929 */ 930 if (args->event_page_offset) { 931 struct kfd_dev *kfd; 932 struct kfd_process_device *pdd; 933 void *mem, *kern_addr; 934 uint64_t size; 935 936 if (p->signal_page) { 937 pr_err("Event page is already set\n"); 938 return -EINVAL; 939 } 940 941 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 942 if (!kfd) { 943 pr_err("Getting device by id failed in %s\n", __func__); 944 return -EINVAL; 945 } 946 947 mutex_lock(&p->mutex); 948 pdd = kfd_bind_process_to_device(kfd, p); 949 if (IS_ERR(pdd)) { 950 err = PTR_ERR(pdd); 951 goto out_unlock; 952 } 953 954 mem = kfd_process_device_translate_handle(pdd, 955 GET_IDR_HANDLE(args->event_page_offset)); 956 if (!mem) { 957 pr_err("Can't find BO, offset is 0x%llx\n", 958 args->event_page_offset); 959 err = -EINVAL; 960 goto out_unlock; 961 } 962 mutex_unlock(&p->mutex); 963 964 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, 965 mem, &kern_addr, &size); 966 if (err) { 967 pr_err("Failed to map event page to kernel\n"); 968 return err; 969 } 970 971 err = kfd_event_page_set(p, kern_addr, size); 972 if (err) { 973 pr_err("Failed to set event page\n"); 974 return err; 975 } 976 } 977 978 err = kfd_event_create(filp, p, args->event_type, 979 args->auto_reset != 0, args->node_id, 980 &args->event_id, &args->event_trigger_data, 981 &args->event_page_offset, 982 &args->event_slot_index); 983 984 return err; 985 986 out_unlock: 987 mutex_unlock(&p->mutex); 988 return err; 989 } 990 991 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 992 void *data) 993 { 994 struct kfd_ioctl_destroy_event_args *args = data; 995 996 return kfd_event_destroy(p, args->event_id); 997 } 998 999 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1000 void *data) 1001 { 1002 struct kfd_ioctl_set_event_args *args = data; 1003 1004 return kfd_set_event(p, args->event_id); 1005 } 1006 1007 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1008 void *data) 1009 { 1010 struct kfd_ioctl_reset_event_args *args = data; 1011 1012 return kfd_reset_event(p, args->event_id); 1013 } 1014 1015 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1016 void *data) 1017 { 1018 struct kfd_ioctl_wait_events_args *args = data; 1019 int err; 1020 1021 err = kfd_wait_on_events(p, args->num_events, 1022 (void __user *)args->events_ptr, 1023 (args->wait_for_all != 0), 1024 args->timeout, &args->wait_result); 1025 1026 return err; 1027 } 1028 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1029 struct kfd_process *p, void *data) 1030 { 1031 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1032 struct kfd_process_device *pdd; 1033 struct kfd_dev *dev; 1034 long err; 1035 1036 dev = kfd_device_by_id(args->gpu_id); 1037 if (!dev) 1038 return -EINVAL; 1039 1040 mutex_lock(&p->mutex); 1041 1042 pdd = kfd_bind_process_to_device(dev, p); 1043 if (IS_ERR(pdd)) { 1044 err = PTR_ERR(pdd); 1045 goto bind_process_to_device_fail; 1046 } 1047 1048 pdd->qpd.sh_hidden_private_base = args->va_addr; 1049 1050 mutex_unlock(&p->mutex); 1051 1052 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1053 pdd->qpd.vmid != 0) 1054 dev->kfd2kgd->set_scratch_backing_va( 1055 dev->kgd, args->va_addr, pdd->qpd.vmid); 1056 1057 return 0; 1058 1059 bind_process_to_device_fail: 1060 mutex_unlock(&p->mutex); 1061 return err; 1062 } 1063 1064 static int kfd_ioctl_get_tile_config(struct file *filep, 1065 struct kfd_process *p, void *data) 1066 { 1067 struct kfd_ioctl_get_tile_config_args *args = data; 1068 struct kfd_dev *dev; 1069 struct tile_config config; 1070 int err = 0; 1071 1072 dev = kfd_device_by_id(args->gpu_id); 1073 if (!dev) 1074 return -EINVAL; 1075 1076 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1077 1078 args->gb_addr_config = config.gb_addr_config; 1079 args->num_banks = config.num_banks; 1080 args->num_ranks = config.num_ranks; 1081 1082 if (args->num_tile_configs > config.num_tile_configs) 1083 args->num_tile_configs = config.num_tile_configs; 1084 err = copy_to_user((void __user *)args->tile_config_ptr, 1085 config.tile_config_ptr, 1086 args->num_tile_configs * sizeof(uint32_t)); 1087 if (err) { 1088 args->num_tile_configs = 0; 1089 return -EFAULT; 1090 } 1091 1092 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1093 args->num_macro_tile_configs = 1094 config.num_macro_tile_configs; 1095 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1096 config.macro_tile_config_ptr, 1097 args->num_macro_tile_configs * sizeof(uint32_t)); 1098 if (err) { 1099 args->num_macro_tile_configs = 0; 1100 return -EFAULT; 1101 } 1102 1103 return 0; 1104 } 1105 1106 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1107 void *data) 1108 { 1109 struct kfd_ioctl_acquire_vm_args *args = data; 1110 struct kfd_process_device *pdd; 1111 struct kfd_dev *dev; 1112 struct file *drm_file; 1113 int ret; 1114 1115 dev = kfd_device_by_id(args->gpu_id); 1116 if (!dev) 1117 return -EINVAL; 1118 1119 drm_file = fget(args->drm_fd); 1120 if (!drm_file) 1121 return -EINVAL; 1122 1123 mutex_lock(&p->mutex); 1124 1125 pdd = kfd_get_process_device_data(dev, p); 1126 if (!pdd) { 1127 ret = -EINVAL; 1128 goto err_unlock; 1129 } 1130 1131 if (pdd->drm_file) { 1132 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1133 goto err_unlock; 1134 } 1135 1136 ret = kfd_process_device_init_vm(pdd, drm_file); 1137 if (ret) 1138 goto err_unlock; 1139 /* On success, the PDD keeps the drm_file reference */ 1140 mutex_unlock(&p->mutex); 1141 1142 return 0; 1143 1144 err_unlock: 1145 mutex_unlock(&p->mutex); 1146 fput(drm_file); 1147 return ret; 1148 } 1149 1150 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1151 { 1152 struct kfd_local_mem_info mem_info; 1153 1154 if (debug_largebar) { 1155 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1156 return true; 1157 } 1158 1159 if (dev->device_info->needs_iommu_device) 1160 return false; 1161 1162 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); 1163 if (mem_info.local_mem_size_private == 0 && 1164 mem_info.local_mem_size_public > 0) 1165 return true; 1166 return false; 1167 } 1168 1169 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1170 struct kfd_process *p, void *data) 1171 { 1172 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1173 struct kfd_process_device *pdd; 1174 void *mem; 1175 struct kfd_dev *dev; 1176 int idr_handle; 1177 long err; 1178 uint64_t offset = args->mmap_offset; 1179 uint32_t flags = args->flags; 1180 1181 if (args->size == 0) 1182 return -EINVAL; 1183 1184 dev = kfd_device_by_id(args->gpu_id); 1185 if (!dev) 1186 return -EINVAL; 1187 1188 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1189 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1190 !kfd_dev_is_large_bar(dev)) { 1191 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1192 return -EINVAL; 1193 } 1194 1195 mutex_lock(&p->mutex); 1196 1197 pdd = kfd_bind_process_to_device(dev, p); 1198 if (IS_ERR(pdd)) { 1199 err = PTR_ERR(pdd); 1200 goto err_unlock; 1201 } 1202 1203 err = dev->kfd2kgd->alloc_memory_of_gpu( 1204 dev->kgd, args->va_addr, args->size, 1205 pdd->vm, (struct kgd_mem **) &mem, &offset, 1206 flags); 1207 1208 if (err) 1209 goto err_unlock; 1210 1211 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1212 if (idr_handle < 0) { 1213 err = -EFAULT; 1214 goto err_free; 1215 } 1216 1217 mutex_unlock(&p->mutex); 1218 1219 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1220 args->mmap_offset = offset; 1221 1222 return 0; 1223 1224 err_free: 1225 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1226 err_unlock: 1227 mutex_unlock(&p->mutex); 1228 return err; 1229 } 1230 1231 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1232 struct kfd_process *p, void *data) 1233 { 1234 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1235 struct kfd_process_device *pdd; 1236 void *mem; 1237 struct kfd_dev *dev; 1238 int ret; 1239 1240 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1241 if (!dev) 1242 return -EINVAL; 1243 1244 mutex_lock(&p->mutex); 1245 1246 pdd = kfd_get_process_device_data(dev, p); 1247 if (!pdd) { 1248 pr_err("Process device data doesn't exist\n"); 1249 ret = -EINVAL; 1250 goto err_unlock; 1251 } 1252 1253 mem = kfd_process_device_translate_handle( 1254 pdd, GET_IDR_HANDLE(args->handle)); 1255 if (!mem) { 1256 ret = -EINVAL; 1257 goto err_unlock; 1258 } 1259 1260 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1261 1262 /* If freeing the buffer failed, leave the handle in place for 1263 * clean-up during process tear-down. 1264 */ 1265 if (!ret) 1266 kfd_process_device_remove_obj_handle( 1267 pdd, GET_IDR_HANDLE(args->handle)); 1268 1269 err_unlock: 1270 mutex_unlock(&p->mutex); 1271 return ret; 1272 } 1273 1274 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1275 struct kfd_process *p, void *data) 1276 { 1277 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1278 struct kfd_process_device *pdd, *peer_pdd; 1279 void *mem; 1280 struct kfd_dev *dev, *peer; 1281 long err = 0; 1282 int i; 1283 uint32_t *devices_arr = NULL; 1284 1285 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1286 if (!dev) 1287 return -EINVAL; 1288 1289 if (!args->n_devices) { 1290 pr_debug("Device IDs array empty\n"); 1291 return -EINVAL; 1292 } 1293 if (args->n_success > args->n_devices) { 1294 pr_debug("n_success exceeds n_devices\n"); 1295 return -EINVAL; 1296 } 1297 1298 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), 1299 GFP_KERNEL); 1300 if (!devices_arr) 1301 return -ENOMEM; 1302 1303 err = copy_from_user(devices_arr, 1304 (void __user *)args->device_ids_array_ptr, 1305 args->n_devices * sizeof(*devices_arr)); 1306 if (err != 0) { 1307 err = -EFAULT; 1308 goto copy_from_user_failed; 1309 } 1310 1311 mutex_lock(&p->mutex); 1312 1313 pdd = kfd_bind_process_to_device(dev, p); 1314 if (IS_ERR(pdd)) { 1315 err = PTR_ERR(pdd); 1316 goto bind_process_to_device_failed; 1317 } 1318 1319 mem = kfd_process_device_translate_handle(pdd, 1320 GET_IDR_HANDLE(args->handle)); 1321 if (!mem) { 1322 err = -ENOMEM; 1323 goto get_mem_obj_from_handle_failed; 1324 } 1325 1326 for (i = args->n_success; i < args->n_devices; i++) { 1327 peer = kfd_device_by_id(devices_arr[i]); 1328 if (!peer) { 1329 pr_debug("Getting device by id failed for 0x%x\n", 1330 devices_arr[i]); 1331 err = -EINVAL; 1332 goto get_mem_obj_from_handle_failed; 1333 } 1334 1335 peer_pdd = kfd_bind_process_to_device(peer, p); 1336 if (IS_ERR(peer_pdd)) { 1337 err = PTR_ERR(peer_pdd); 1338 goto get_mem_obj_from_handle_failed; 1339 } 1340 err = peer->kfd2kgd->map_memory_to_gpu( 1341 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1342 if (err) { 1343 pr_err("Failed to map to gpu %d/%d\n", 1344 i, args->n_devices); 1345 goto map_memory_to_gpu_failed; 1346 } 1347 args->n_success = i+1; 1348 } 1349 1350 mutex_unlock(&p->mutex); 1351 1352 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1353 if (err) { 1354 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1355 goto sync_memory_failed; 1356 } 1357 1358 /* Flush TLBs after waiting for the page table updates to complete */ 1359 for (i = 0; i < args->n_devices; i++) { 1360 peer = kfd_device_by_id(devices_arr[i]); 1361 if (WARN_ON_ONCE(!peer)) 1362 continue; 1363 peer_pdd = kfd_get_process_device_data(peer, p); 1364 if (WARN_ON_ONCE(!peer_pdd)) 1365 continue; 1366 kfd_flush_tlb(peer_pdd); 1367 } 1368 1369 kfree(devices_arr); 1370 1371 return err; 1372 1373 bind_process_to_device_failed: 1374 get_mem_obj_from_handle_failed: 1375 map_memory_to_gpu_failed: 1376 mutex_unlock(&p->mutex); 1377 copy_from_user_failed: 1378 sync_memory_failed: 1379 kfree(devices_arr); 1380 1381 return err; 1382 } 1383 1384 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1385 struct kfd_process *p, void *data) 1386 { 1387 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1388 struct kfd_process_device *pdd, *peer_pdd; 1389 void *mem; 1390 struct kfd_dev *dev, *peer; 1391 long err = 0; 1392 uint32_t *devices_arr = NULL, i; 1393 1394 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1395 if (!dev) 1396 return -EINVAL; 1397 1398 if (!args->n_devices) { 1399 pr_debug("Device IDs array empty\n"); 1400 return -EINVAL; 1401 } 1402 if (args->n_success > args->n_devices) { 1403 pr_debug("n_success exceeds n_devices\n"); 1404 return -EINVAL; 1405 } 1406 1407 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), 1408 GFP_KERNEL); 1409 if (!devices_arr) 1410 return -ENOMEM; 1411 1412 err = copy_from_user(devices_arr, 1413 (void __user *)args->device_ids_array_ptr, 1414 args->n_devices * sizeof(*devices_arr)); 1415 if (err != 0) { 1416 err = -EFAULT; 1417 goto copy_from_user_failed; 1418 } 1419 1420 mutex_lock(&p->mutex); 1421 1422 pdd = kfd_get_process_device_data(dev, p); 1423 if (!pdd) { 1424 err = PTR_ERR(pdd); 1425 goto bind_process_to_device_failed; 1426 } 1427 1428 mem = kfd_process_device_translate_handle(pdd, 1429 GET_IDR_HANDLE(args->handle)); 1430 if (!mem) { 1431 err = -ENOMEM; 1432 goto get_mem_obj_from_handle_failed; 1433 } 1434 1435 for (i = args->n_success; i < args->n_devices; i++) { 1436 peer = kfd_device_by_id(devices_arr[i]); 1437 if (!peer) { 1438 err = -EINVAL; 1439 goto get_mem_obj_from_handle_failed; 1440 } 1441 1442 peer_pdd = kfd_get_process_device_data(peer, p); 1443 if (!peer_pdd) { 1444 err = -ENODEV; 1445 goto get_mem_obj_from_handle_failed; 1446 } 1447 err = dev->kfd2kgd->unmap_memory_to_gpu( 1448 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1449 if (err) { 1450 pr_err("Failed to unmap from gpu %d/%d\n", 1451 i, args->n_devices); 1452 goto unmap_memory_from_gpu_failed; 1453 } 1454 args->n_success = i+1; 1455 } 1456 kfree(devices_arr); 1457 1458 mutex_unlock(&p->mutex); 1459 1460 return 0; 1461 1462 bind_process_to_device_failed: 1463 get_mem_obj_from_handle_failed: 1464 unmap_memory_from_gpu_failed: 1465 mutex_unlock(&p->mutex); 1466 copy_from_user_failed: 1467 kfree(devices_arr); 1468 return err; 1469 } 1470 1471 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1472 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1473 .cmd_drv = 0, .name = #ioctl} 1474 1475 /** Ioctl table */ 1476 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1477 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1478 kfd_ioctl_get_version, 0), 1479 1480 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1481 kfd_ioctl_create_queue, 0), 1482 1483 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1484 kfd_ioctl_destroy_queue, 0), 1485 1486 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1487 kfd_ioctl_set_memory_policy, 0), 1488 1489 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1490 kfd_ioctl_get_clock_counters, 0), 1491 1492 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1493 kfd_ioctl_get_process_apertures, 0), 1494 1495 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1496 kfd_ioctl_update_queue, 0), 1497 1498 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1499 kfd_ioctl_create_event, 0), 1500 1501 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1502 kfd_ioctl_destroy_event, 0), 1503 1504 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1505 kfd_ioctl_set_event, 0), 1506 1507 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1508 kfd_ioctl_reset_event, 0), 1509 1510 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1511 kfd_ioctl_wait_events, 0), 1512 1513 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1514 kfd_ioctl_dbg_register, 0), 1515 1516 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1517 kfd_ioctl_dbg_unregister, 0), 1518 1519 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1520 kfd_ioctl_dbg_address_watch, 0), 1521 1522 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1523 kfd_ioctl_dbg_wave_control, 0), 1524 1525 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1526 kfd_ioctl_set_scratch_backing_va, 0), 1527 1528 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1529 kfd_ioctl_get_tile_config, 0), 1530 1531 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1532 kfd_ioctl_set_trap_handler, 0), 1533 1534 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1535 kfd_ioctl_get_process_apertures_new, 0), 1536 1537 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1538 kfd_ioctl_acquire_vm, 0), 1539 1540 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1541 kfd_ioctl_alloc_memory_of_gpu, 0), 1542 1543 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1544 kfd_ioctl_free_memory_of_gpu, 0), 1545 1546 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1547 kfd_ioctl_map_memory_to_gpu, 0), 1548 1549 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1550 kfd_ioctl_unmap_memory_from_gpu, 0), 1551 1552 }; 1553 1554 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1555 1556 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1557 { 1558 struct kfd_process *process; 1559 amdkfd_ioctl_t *func; 1560 const struct amdkfd_ioctl_desc *ioctl = NULL; 1561 unsigned int nr = _IOC_NR(cmd); 1562 char stack_kdata[128]; 1563 char *kdata = NULL; 1564 unsigned int usize, asize; 1565 int retcode = -EINVAL; 1566 1567 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1568 goto err_i1; 1569 1570 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1571 u32 amdkfd_size; 1572 1573 ioctl = &amdkfd_ioctls[nr]; 1574 1575 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1576 usize = asize = _IOC_SIZE(cmd); 1577 if (amdkfd_size > asize) 1578 asize = amdkfd_size; 1579 1580 cmd = ioctl->cmd; 1581 } else 1582 goto err_i1; 1583 1584 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1585 1586 process = kfd_get_process(current); 1587 if (IS_ERR(process)) { 1588 dev_dbg(kfd_device, "no process\n"); 1589 goto err_i1; 1590 } 1591 1592 /* Do not trust userspace, use our own definition */ 1593 func = ioctl->func; 1594 1595 if (unlikely(!func)) { 1596 dev_dbg(kfd_device, "no function\n"); 1597 retcode = -EINVAL; 1598 goto err_i1; 1599 } 1600 1601 if (cmd & (IOC_IN | IOC_OUT)) { 1602 if (asize <= sizeof(stack_kdata)) { 1603 kdata = stack_kdata; 1604 } else { 1605 kdata = kmalloc(asize, GFP_KERNEL); 1606 if (!kdata) { 1607 retcode = -ENOMEM; 1608 goto err_i1; 1609 } 1610 } 1611 if (asize > usize) 1612 memset(kdata + usize, 0, asize - usize); 1613 } 1614 1615 if (cmd & IOC_IN) { 1616 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1617 retcode = -EFAULT; 1618 goto err_i1; 1619 } 1620 } else if (cmd & IOC_OUT) { 1621 memset(kdata, 0, usize); 1622 } 1623 1624 retcode = func(filep, process, kdata); 1625 1626 if (cmd & IOC_OUT) 1627 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1628 retcode = -EFAULT; 1629 1630 err_i1: 1631 if (!ioctl) 1632 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1633 task_pid_nr(current), cmd, nr); 1634 1635 if (kdata != stack_kdata) 1636 kfree(kdata); 1637 1638 if (retcode) 1639 dev_dbg(kfd_device, "ret = %d\n", retcode); 1640 1641 return retcode; 1642 } 1643 1644 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1645 { 1646 struct kfd_process *process; 1647 1648 process = kfd_get_process(current); 1649 if (IS_ERR(process)) 1650 return PTR_ERR(process); 1651 1652 if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == 1653 KFD_MMAP_DOORBELL_MASK) { 1654 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; 1655 return kfd_doorbell_mmap(process, vma); 1656 } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == 1657 KFD_MMAP_EVENTS_MASK) { 1658 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; 1659 return kfd_event_mmap(process, vma); 1660 } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == 1661 KFD_MMAP_RESERVED_MEM_MASK) { 1662 vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; 1663 return kfd_reserved_mem_mmap(process, vma); 1664 } 1665 1666 return -EFAULT; 1667 } 1668