1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef KFD_PRIV_H_INCLUDED 24 #define KFD_PRIV_H_INCLUDED 25 26 #include <linux/hashtable.h> 27 #include <linux/mmu_notifier.h> 28 #include <linux/mutex.h> 29 #include <linux/types.h> 30 #include <linux/atomic.h> 31 #include <linux/workqueue.h> 32 #include <linux/spinlock.h> 33 #include <linux/kfd_ioctl.h> 34 #include <linux/idr.h> 35 #include <linux/kfifo.h> 36 #include <linux/seq_file.h> 37 #include <linux/kref.h> 38 #include <kgd_kfd_interface.h> 39 40 #include "amd_shared.h" 41 42 #define KFD_SYSFS_FILE_MODE 0444 43 44 #define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull 45 #define KFD_MMAP_EVENTS_MASK 0x4000000000000ull 46 #define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull 47 48 /* 49 * When working with cp scheduler we should assign the HIQ manually or via 50 * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot 51 * definitions for Kaveri. In Kaveri only the first ME queues participates 52 * in the cp scheduling taking that in mind we set the HIQ slot in the 53 * second ME. 54 */ 55 #define KFD_CIK_HIQ_PIPE 4 56 #define KFD_CIK_HIQ_QUEUE 0 57 58 /* GPU ID hash width in bits */ 59 #define KFD_GPU_ID_HASH_WIDTH 16 60 61 /* Macro for allocating structures */ 62 #define kfd_alloc_struct(ptr_to_struct) \ 63 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 64 65 #define KFD_MAX_NUM_OF_PROCESSES 512 66 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 67 68 /* 69 * Size of the per-process TBA+TMA buffer: 2 pages 70 * 71 * The first page is the TBA used for the CWSR ISA code. The second 72 * page is used as TMA for daisy changing a user-mode trap handler. 73 */ 74 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) 75 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE 76 77 /* 78 * Kernel module parameter to specify maximum number of supported queues per 79 * device 80 */ 81 extern int max_num_of_queues_per_device; 82 83 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 84 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ 85 (KFD_MAX_NUM_OF_PROCESSES * \ 86 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 87 88 #define KFD_KERNEL_QUEUE_SIZE 2048 89 90 /* Kernel module parameter to specify the scheduling policy */ 91 extern int sched_policy; 92 93 /* 94 * Kernel module parameter to specify the maximum process 95 * number per HW scheduler 96 */ 97 extern int hws_max_conc_proc; 98 99 extern int cwsr_enable; 100 101 /* 102 * Kernel module parameter to specify whether to send sigterm to HSA process on 103 * unhandled exception 104 */ 105 extern int send_sigterm; 106 107 /* 108 * This kernel module is used to simulate large bar machine on non-large bar 109 * enabled machines. 110 */ 111 extern int debug_largebar; 112 113 /* 114 * Ignore CRAT table during KFD initialization, can be used to work around 115 * broken CRAT tables on some AMD systems 116 */ 117 extern int ignore_crat; 118 119 /** 120 * enum kfd_sched_policy 121 * 122 * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 123 * scheduling. In this scheduling mode we're using the firmware code to 124 * schedule the user mode queues and kernel queues such as HIQ and DIQ. 125 * the HIQ queue is used as a special queue that dispatches the configuration 126 * to the cp and the user mode queues list that are currently running. 127 * the DIQ queue is a debugging queue that dispatches debugging commands to the 128 * firmware. 129 * in this scheduling mode user mode queues over subscription feature is 130 * enabled. 131 * 132 * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 133 * subscription feature disabled. 134 * 135 * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 136 * set the command processor registers and sets the queues "manually". This 137 * mode is used *ONLY* for debugging proposes. 138 * 139 */ 140 enum kfd_sched_policy { 141 KFD_SCHED_POLICY_HWS = 0, 142 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 143 KFD_SCHED_POLICY_NO_HWS 144 }; 145 146 enum cache_policy { 147 cache_policy_coherent, 148 cache_policy_noncoherent 149 }; 150 151 struct kfd_event_interrupt_class { 152 bool (*interrupt_isr)(struct kfd_dev *dev, 153 const uint32_t *ih_ring_entry); 154 void (*interrupt_wq)(struct kfd_dev *dev, 155 const uint32_t *ih_ring_entry); 156 }; 157 158 struct kfd_device_info { 159 enum amd_asic_type asic_family; 160 const struct kfd_event_interrupt_class *event_interrupt_class; 161 unsigned int max_pasid_bits; 162 unsigned int max_no_of_hqd; 163 size_t ih_ring_entry_size; 164 uint8_t num_of_watch_points; 165 uint16_t mqd_size_aligned; 166 bool supports_cwsr; 167 bool needs_iommu_device; 168 bool needs_pci_atomics; 169 }; 170 171 struct kfd_mem_obj { 172 uint32_t range_start; 173 uint32_t range_end; 174 uint64_t gpu_addr; 175 uint32_t *cpu_ptr; 176 }; 177 178 struct kfd_vmid_info { 179 uint32_t first_vmid_kfd; 180 uint32_t last_vmid_kfd; 181 uint32_t vmid_num_kfd; 182 }; 183 184 struct kfd_dev { 185 struct kgd_dev *kgd; 186 187 const struct kfd_device_info *device_info; 188 struct pci_dev *pdev; 189 190 unsigned int id; /* topology stub index */ 191 192 phys_addr_t doorbell_base; /* Start of actual doorbells used by 193 * KFD. It is aligned for mapping 194 * into user mode 195 */ 196 size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell 197 * to HW doorbell, GFX reserved some 198 * at the start) 199 */ 200 u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 201 * page used by kernel queue 202 */ 203 204 struct kgd2kfd_shared_resources shared_resources; 205 struct kfd_vmid_info vm_info; 206 207 const struct kfd2kgd_calls *kfd2kgd; 208 struct mutex doorbell_mutex; 209 DECLARE_BITMAP(doorbell_available_index, 210 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 211 212 void *gtt_mem; 213 uint64_t gtt_start_gpu_addr; 214 void *gtt_start_cpu_ptr; 215 void *gtt_sa_bitmap; 216 struct mutex gtt_sa_lock; 217 unsigned int gtt_sa_chunk_size; 218 unsigned int gtt_sa_num_of_chunks; 219 220 /* Interrupts */ 221 struct kfifo ih_fifo; 222 struct workqueue_struct *ih_wq; 223 struct work_struct interrupt_work; 224 spinlock_t interrupt_lock; 225 226 /* QCM Device instance */ 227 struct device_queue_manager *dqm; 228 229 bool init_complete; 230 /* 231 * Interrupts of interest to KFD are copied 232 * from the HW ring into a SW ring. 233 */ 234 bool interrupts_active; 235 236 /* Debug manager */ 237 struct kfd_dbgmgr *dbgmgr; 238 239 /* Maximum process number mapped to HW scheduler */ 240 unsigned int max_proc_per_quantum; 241 242 /* CWSR */ 243 bool cwsr_enabled; 244 const void *cwsr_isa; 245 unsigned int cwsr_isa_size; 246 }; 247 248 /* KGD2KFD callbacks */ 249 void kgd2kfd_exit(void); 250 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, 251 struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); 252 bool kgd2kfd_device_init(struct kfd_dev *kfd, 253 const struct kgd2kfd_shared_resources *gpu_resources); 254 void kgd2kfd_device_exit(struct kfd_dev *kfd); 255 256 enum kfd_mempool { 257 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 258 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 259 KFD_MEMPOOL_FRAMEBUFFER = 3, 260 }; 261 262 /* Character device interface */ 263 int kfd_chardev_init(void); 264 void kfd_chardev_exit(void); 265 struct device *kfd_chardev(void); 266 267 /** 268 * enum kfd_unmap_queues_filter 269 * 270 * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. 271 * 272 * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the 273 * running queues list. 274 * 275 * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to 276 * specific process. 277 * 278 */ 279 enum kfd_unmap_queues_filter { 280 KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE, 281 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 282 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 283 KFD_UNMAP_QUEUES_FILTER_BY_PASID 284 }; 285 286 /** 287 * enum kfd_queue_type 288 * 289 * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 290 * 291 * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. 292 * 293 * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 294 * 295 * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 296 */ 297 enum kfd_queue_type { 298 KFD_QUEUE_TYPE_COMPUTE, 299 KFD_QUEUE_TYPE_SDMA, 300 KFD_QUEUE_TYPE_HIQ, 301 KFD_QUEUE_TYPE_DIQ 302 }; 303 304 enum kfd_queue_format { 305 KFD_QUEUE_FORMAT_PM4, 306 KFD_QUEUE_FORMAT_AQL 307 }; 308 309 /** 310 * struct queue_properties 311 * 312 * @type: The queue type. 313 * 314 * @queue_id: Queue identifier. 315 * 316 * @queue_address: Queue ring buffer address. 317 * 318 * @queue_size: Queue ring buffer size. 319 * 320 * @priority: Defines the queue priority relative to other queues in the 321 * process. 322 * This is just an indication and HW scheduling may override the priority as 323 * necessary while keeping the relative prioritization. 324 * the priority granularity is from 0 to f which f is the highest priority. 325 * currently all queues are initialized with the highest priority. 326 * 327 * @queue_percent: This field is partially implemented and currently a zero in 328 * this field defines that the queue is non active. 329 * 330 * @read_ptr: User space address which points to the number of dwords the 331 * cp read from the ring buffer. This field updates automatically by the H/W. 332 * 333 * @write_ptr: Defines the number of dwords written to the ring buffer. 334 * 335 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to 336 * the queue ring buffer. This field should be similar to write_ptr and the 337 * user should update this field after he updated the write_ptr. 338 * 339 * @doorbell_off: The doorbell offset in the doorbell pci-bar. 340 * 341 * @is_interop: Defines if this is a interop queue. Interop queue means that 342 * the queue can access both graphics and compute resources. 343 * 344 * @is_evicted: Defines if the queue is evicted. Only active queues 345 * are evicted, rendering them inactive. 346 * 347 * @is_active: Defines if the queue is active or not. @is_active and 348 * @is_evicted are protected by the DQM lock. 349 * 350 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 351 * of the queue. 352 * 353 * This structure represents the queue properties for each queue no matter if 354 * it's user mode or kernel mode queue. 355 * 356 */ 357 struct queue_properties { 358 enum kfd_queue_type type; 359 enum kfd_queue_format format; 360 unsigned int queue_id; 361 uint64_t queue_address; 362 uint64_t queue_size; 363 uint32_t priority; 364 uint32_t queue_percent; 365 uint32_t *read_ptr; 366 uint32_t *write_ptr; 367 uint32_t __iomem *doorbell_ptr; 368 uint32_t doorbell_off; 369 bool is_interop; 370 bool is_evicted; 371 bool is_active; 372 /* Not relevant for user mode queues in cp scheduling */ 373 unsigned int vmid; 374 /* Relevant only for sdma queues*/ 375 uint32_t sdma_engine_id; 376 uint32_t sdma_queue_id; 377 uint32_t sdma_vm_addr; 378 /* Relevant only for VI */ 379 uint64_t eop_ring_buffer_address; 380 uint32_t eop_ring_buffer_size; 381 uint64_t ctx_save_restore_area_address; 382 uint32_t ctx_save_restore_area_size; 383 uint32_t ctl_stack_size; 384 uint64_t tba_addr; 385 uint64_t tma_addr; 386 }; 387 388 /** 389 * struct queue 390 * 391 * @list: Queue linked list. 392 * 393 * @mqd: The queue MQD. 394 * 395 * @mqd_mem_obj: The MQD local gpu memory object. 396 * 397 * @gart_mqd_addr: The MQD gart mc address. 398 * 399 * @properties: The queue properties. 400 * 401 * @mec: Used only in no cp scheduling mode and identifies to micro engine id 402 * that the queue should be execute on. 403 * 404 * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe 405 * id. 406 * 407 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 408 * 409 * @process: The kfd process that created this queue. 410 * 411 * @device: The kfd device that created this queue. 412 * 413 * This structure represents user mode compute queues. 414 * It contains all the necessary data to handle such queues. 415 * 416 */ 417 418 struct queue { 419 struct list_head list; 420 void *mqd; 421 struct kfd_mem_obj *mqd_mem_obj; 422 uint64_t gart_mqd_addr; 423 struct queue_properties properties; 424 425 uint32_t mec; 426 uint32_t pipe; 427 uint32_t queue; 428 429 unsigned int sdma_id; 430 431 struct kfd_process *process; 432 struct kfd_dev *device; 433 }; 434 435 /* 436 * Please read the kfd_mqd_manager.h description. 437 */ 438 enum KFD_MQD_TYPE { 439 KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ 440 KFD_MQD_TYPE_HIQ, /* for hiq */ 441 KFD_MQD_TYPE_CP, /* for cp queues and diq */ 442 KFD_MQD_TYPE_SDMA, /* for sdma queues */ 443 KFD_MQD_TYPE_MAX 444 }; 445 446 struct scheduling_resources { 447 unsigned int vmid_mask; 448 enum kfd_queue_type type; 449 uint64_t queue_mask; 450 uint64_t gws_mask; 451 uint32_t oac_mask; 452 uint32_t gds_heap_base; 453 uint32_t gds_heap_size; 454 }; 455 456 struct process_queue_manager { 457 /* data */ 458 struct kfd_process *process; 459 struct list_head queues; 460 unsigned long *queue_slot_bitmap; 461 }; 462 463 struct qcm_process_device { 464 /* The Device Queue Manager that owns this data */ 465 struct device_queue_manager *dqm; 466 struct process_queue_manager *pqm; 467 /* Queues list */ 468 struct list_head queues_list; 469 struct list_head priv_queue_list; 470 471 unsigned int queue_count; 472 unsigned int vmid; 473 bool is_debug; 474 unsigned int evicted; /* eviction counter, 0=active */ 475 476 /* This flag tells if we should reset all wavefronts on 477 * process termination 478 */ 479 bool reset_wavefronts; 480 481 /* 482 * All the memory management data should be here too 483 */ 484 uint64_t gds_context_area; 485 uint32_t sh_mem_config; 486 uint32_t sh_mem_bases; 487 uint32_t sh_mem_ape1_base; 488 uint32_t sh_mem_ape1_limit; 489 uint32_t page_table_base; 490 uint32_t gds_size; 491 uint32_t num_gws; 492 uint32_t num_oac; 493 uint32_t sh_hidden_private_base; 494 495 /* CWSR memory */ 496 void *cwsr_kaddr; 497 uint64_t cwsr_base; 498 uint64_t tba_addr; 499 uint64_t tma_addr; 500 501 /* IB memory */ 502 uint64_t ib_base; 503 void *ib_kaddr; 504 }; 505 506 /* KFD Memory Eviction */ 507 508 /* Approx. wait time before attempting to restore evicted BOs */ 509 #define PROCESS_RESTORE_TIME_MS 100 510 /* Approx. back off time if restore fails due to lack of memory */ 511 #define PROCESS_BACK_OFF_TIME_MS 100 512 /* Approx. time before evicting the process again */ 513 #define PROCESS_ACTIVE_TIME_MS 10 514 515 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 516 struct dma_fence *fence); 517 518 /* 8 byte handle containing GPU ID in the most significant 4 bytes and 519 * idr_handle in the least significant 4 bytes 520 */ 521 #define MAKE_HANDLE(gpu_id, idr_handle) \ 522 (((uint64_t)(gpu_id) << 32) + idr_handle) 523 #define GET_GPU_ID(handle) (handle >> 32) 524 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) 525 526 enum kfd_pdd_bound { 527 PDD_UNBOUND = 0, 528 PDD_BOUND, 529 PDD_BOUND_SUSPENDED, 530 }; 531 532 /* Data that is per-process-per device. */ 533 struct kfd_process_device { 534 /* 535 * List of all per-device data for a process. 536 * Starts from kfd_process.per_device_data. 537 */ 538 struct list_head per_device_list; 539 540 /* The device that owns this data. */ 541 struct kfd_dev *dev; 542 543 /* The process that owns this kfd_process_device. */ 544 struct kfd_process *process; 545 546 /* per-process-per device QCM data structure */ 547 struct qcm_process_device qpd; 548 549 /*Apertures*/ 550 uint64_t lds_base; 551 uint64_t lds_limit; 552 uint64_t gpuvm_base; 553 uint64_t gpuvm_limit; 554 uint64_t scratch_base; 555 uint64_t scratch_limit; 556 557 /* VM context for GPUVM allocations */ 558 struct file *drm_file; 559 void *vm; 560 561 /* GPUVM allocations storage */ 562 struct idr alloc_idr; 563 564 /* Flag used to tell the pdd has dequeued from the dqm. 565 * This is used to prevent dev->dqm->ops.process_termination() from 566 * being called twice when it is already called in IOMMU callback 567 * function. 568 */ 569 bool already_dequeued; 570 571 /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 572 enum kfd_pdd_bound bound; 573 }; 574 575 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 576 577 /* Process data */ 578 struct kfd_process { 579 /* 580 * kfd_process are stored in an mm_struct*->kfd_process* 581 * hash table (kfd_processes in kfd_process.c) 582 */ 583 struct hlist_node kfd_processes; 584 585 /* 586 * Opaque pointer to mm_struct. We don't hold a reference to 587 * it so it should never be dereferenced from here. This is 588 * only used for looking up processes by their mm. 589 */ 590 void *mm; 591 592 struct kref ref; 593 struct work_struct release_work; 594 595 struct mutex mutex; 596 597 /* 598 * In any process, the thread that started main() is the lead 599 * thread and outlives the rest. 600 * It is here because amd_iommu_bind_pasid wants a task_struct. 601 * It can also be used for safely getting a reference to the 602 * mm_struct of the process. 603 */ 604 struct task_struct *lead_thread; 605 606 /* We want to receive a notification when the mm_struct is destroyed */ 607 struct mmu_notifier mmu_notifier; 608 609 /* Use for delayed freeing of kfd_process structure */ 610 struct rcu_head rcu; 611 612 unsigned int pasid; 613 unsigned int doorbell_index; 614 615 /* 616 * List of kfd_process_device structures, 617 * one for each device the process is using. 618 */ 619 struct list_head per_device_data; 620 621 struct process_queue_manager pqm; 622 623 /*Is the user space process 32 bit?*/ 624 bool is_32bit_user_mode; 625 626 /* Event-related data */ 627 struct mutex event_mutex; 628 /* Event ID allocator and lookup */ 629 struct idr event_idr; 630 /* Event page */ 631 struct kfd_signal_page *signal_page; 632 size_t signal_mapped_size; 633 size_t signal_event_count; 634 bool signal_event_limit_reached; 635 636 /* Information used for memory eviction */ 637 void *kgd_process_info; 638 /* Eviction fence that is attached to all the BOs of this process. The 639 * fence will be triggered during eviction and new one will be created 640 * during restore 641 */ 642 struct dma_fence *ef; 643 644 /* Work items for evicting and restoring BOs */ 645 struct delayed_work eviction_work; 646 struct delayed_work restore_work; 647 /* seqno of the last scheduled eviction */ 648 unsigned int last_eviction_seqno; 649 /* Approx. the last timestamp (in jiffies) when the process was 650 * restored after an eviction 651 */ 652 unsigned long last_restore_timestamp; 653 }; 654 655 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 656 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 657 extern struct srcu_struct kfd_processes_srcu; 658 659 /** 660 * Ioctl function type. 661 * 662 * \param filep pointer to file structure. 663 * \param p amdkfd process pointer. 664 * \param data pointer to arg that was copied from user. 665 */ 666 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 667 void *data); 668 669 struct amdkfd_ioctl_desc { 670 unsigned int cmd; 671 int flags; 672 amdkfd_ioctl_t *func; 673 unsigned int cmd_drv; 674 const char *name; 675 }; 676 677 int kfd_process_create_wq(void); 678 void kfd_process_destroy_wq(void); 679 struct kfd_process *kfd_create_process(struct file *filep); 680 struct kfd_process *kfd_get_process(const struct task_struct *); 681 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); 682 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); 683 void kfd_unref_process(struct kfd_process *p); 684 void kfd_suspend_all_processes(void); 685 int kfd_resume_all_processes(void); 686 687 int kfd_process_device_init_vm(struct kfd_process_device *pdd, 688 struct file *drm_file); 689 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 690 struct kfd_process *p); 691 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 692 struct kfd_process *p); 693 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 694 struct kfd_process *p); 695 696 int kfd_reserved_mem_mmap(struct kfd_process *process, 697 struct vm_area_struct *vma); 698 699 /* KFD process API for creating and translating handles */ 700 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 701 void *mem); 702 void *kfd_process_device_translate_handle(struct kfd_process_device *p, 703 int handle); 704 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 705 int handle); 706 707 /* Process device data iterator */ 708 struct kfd_process_device *kfd_get_first_process_device_data( 709 struct kfd_process *p); 710 struct kfd_process_device *kfd_get_next_process_device_data( 711 struct kfd_process *p, 712 struct kfd_process_device *pdd); 713 bool kfd_has_process_device_data(struct kfd_process *p); 714 715 /* PASIDs */ 716 int kfd_pasid_init(void); 717 void kfd_pasid_exit(void); 718 bool kfd_set_pasid_limit(unsigned int new_limit); 719 unsigned int kfd_get_pasid_limit(void); 720 unsigned int kfd_pasid_alloc(void); 721 void kfd_pasid_free(unsigned int pasid); 722 723 /* Doorbells */ 724 int kfd_doorbell_init(struct kfd_dev *kfd); 725 void kfd_doorbell_fini(struct kfd_dev *kfd); 726 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); 727 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 728 unsigned int *doorbell_off); 729 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 730 u32 read_kernel_doorbell(u32 __iomem *db); 731 void write_kernel_doorbell(u32 __iomem *db, u32 value); 732 unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, 733 struct kfd_process *process, 734 unsigned int queue_id); 735 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, 736 struct kfd_process *process); 737 int kfd_alloc_process_doorbells(struct kfd_process *process); 738 void kfd_free_process_doorbells(struct kfd_process *process); 739 740 /* GTT Sub-Allocator */ 741 742 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 743 struct kfd_mem_obj **mem_obj); 744 745 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); 746 747 extern struct device *kfd_device; 748 749 /* Topology */ 750 int kfd_topology_init(void); 751 void kfd_topology_shutdown(void); 752 int kfd_topology_add_device(struct kfd_dev *gpu); 753 int kfd_topology_remove_device(struct kfd_dev *gpu); 754 struct kfd_topology_device *kfd_topology_device_by_proximity_domain( 755 uint32_t proximity_domain); 756 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 757 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 758 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 759 int kfd_numa_node_to_apic_id(int numa_node_id); 760 761 /* Interrupts */ 762 int kfd_interrupt_init(struct kfd_dev *dev); 763 void kfd_interrupt_exit(struct kfd_dev *dev); 764 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 765 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 766 bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); 767 768 /* Power Management */ 769 void kgd2kfd_suspend(struct kfd_dev *kfd); 770 int kgd2kfd_resume(struct kfd_dev *kfd); 771 772 /* amdkfd Apertures */ 773 int kfd_init_apertures(struct kfd_process *process); 774 775 /* Queue Context Management */ 776 int init_queue(struct queue **q, const struct queue_properties *properties); 777 void uninit_queue(struct queue *q); 778 void print_queue_properties(struct queue_properties *q); 779 void print_queue(struct queue *q); 780 781 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 782 struct kfd_dev *dev); 783 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 784 struct kfd_dev *dev); 785 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 786 struct kfd_dev *dev); 787 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 788 struct kfd_dev *dev); 789 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, 790 struct kfd_dev *dev); 791 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 792 void device_queue_manager_uninit(struct device_queue_manager *dqm); 793 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 794 enum kfd_queue_type type); 795 void kernel_queue_uninit(struct kernel_queue *kq); 796 797 /* Process Queue Manager */ 798 struct process_queue_node { 799 struct queue *q; 800 struct kernel_queue *kq; 801 struct list_head process_queue_list; 802 }; 803 804 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); 805 void kfd_process_dequeue_from_all_devices(struct kfd_process *p); 806 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 807 void pqm_uninit(struct process_queue_manager *pqm); 808 int pqm_create_queue(struct process_queue_manager *pqm, 809 struct kfd_dev *dev, 810 struct file *f, 811 struct queue_properties *properties, 812 unsigned int *qid); 813 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 814 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 815 struct queue_properties *p); 816 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 817 unsigned int qid); 818 819 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 820 unsigned int fence_value, 821 unsigned int timeout_ms); 822 823 /* Packet Manager */ 824 825 #define KFD_FENCE_COMPLETED (100) 826 #define KFD_FENCE_INIT (10) 827 828 struct packet_manager { 829 struct device_queue_manager *dqm; 830 struct kernel_queue *priv_queue; 831 struct mutex lock; 832 bool allocated; 833 struct kfd_mem_obj *ib_buffer_obj; 834 unsigned int ib_size_bytes; 835 }; 836 837 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 838 void pm_uninit(struct packet_manager *pm); 839 int pm_send_set_resources(struct packet_manager *pm, 840 struct scheduling_resources *res); 841 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 842 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 843 uint32_t fence_value); 844 845 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 846 enum kfd_unmap_queues_filter mode, 847 uint32_t filter_param, bool reset, 848 unsigned int sdma_engine); 849 850 void pm_release_ib(struct packet_manager *pm); 851 852 uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); 853 854 uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 855 856 /* Events */ 857 extern const struct kfd_event_interrupt_class event_interrupt_class_cik; 858 extern const struct kfd_device_global_init_class device_global_init_class_cik; 859 860 void kfd_event_init_process(struct kfd_process *p); 861 void kfd_event_free_process(struct kfd_process *p); 862 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); 863 int kfd_wait_on_events(struct kfd_process *p, 864 uint32_t num_events, void __user *data, 865 bool all, uint32_t user_timeout_ms, 866 uint32_t *wait_result); 867 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, 868 uint32_t valid_id_bits); 869 void kfd_signal_iommu_event(struct kfd_dev *dev, 870 unsigned int pasid, unsigned long address, 871 bool is_write_requested, bool is_execute_requested); 872 void kfd_signal_hw_exception_event(unsigned int pasid); 873 int kfd_set_event(struct kfd_process *p, uint32_t event_id); 874 int kfd_reset_event(struct kfd_process *p, uint32_t event_id); 875 int kfd_event_page_set(struct kfd_process *p, void *kernel_address, 876 uint64_t size); 877 int kfd_event_create(struct file *devkfd, struct kfd_process *p, 878 uint32_t event_type, bool auto_reset, uint32_t node_id, 879 uint32_t *event_id, uint32_t *event_trigger_data, 880 uint64_t *event_page_offset, uint32_t *event_slot_index); 881 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 882 883 void kfd_flush_tlb(struct kfd_process_device *pdd); 884 885 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); 886 887 /* Debugfs */ 888 #if defined(CONFIG_DEBUG_FS) 889 890 void kfd_debugfs_init(void); 891 void kfd_debugfs_fini(void); 892 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); 893 int pqm_debugfs_mqds(struct seq_file *m, void *data); 894 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); 895 int dqm_debugfs_hqds(struct seq_file *m, void *data); 896 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); 897 int pm_debugfs_runlist(struct seq_file *m, void *data); 898 899 #else 900 901 static inline void kfd_debugfs_init(void) {} 902 static inline void kfd_debugfs_fini(void) {} 903 904 #endif 905 906 #endif 907