1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef KFD_PRIV_H_INCLUDED 24 #define KFD_PRIV_H_INCLUDED 25 26 #include <linux/hashtable.h> 27 #include <linux/mmu_notifier.h> 28 #include <linux/mutex.h> 29 #include <linux/types.h> 30 #include <linux/atomic.h> 31 #include <linux/workqueue.h> 32 #include <linux/spinlock.h> 33 #include <linux/kfd_ioctl.h> 34 #include <kgd_kfd_interface.h> 35 36 #define KFD_SYSFS_FILE_MODE 0444 37 38 /* 39 * When working with cp scheduler we should assign the HIQ manually or via 40 * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot 41 * definitions for Kaveri. In Kaveri only the first ME queues participates 42 * in the cp scheduling taking that in mind we set the HIQ slot in the 43 * second ME. 44 */ 45 #define KFD_CIK_HIQ_PIPE 4 46 #define KFD_CIK_HIQ_QUEUE 0 47 48 /* GPU ID hash width in bits */ 49 #define KFD_GPU_ID_HASH_WIDTH 16 50 51 /* Macro for allocating structures */ 52 #define kfd_alloc_struct(ptr_to_struct) \ 53 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 54 55 /* Kernel module parameter to specify maximum number of supported processes */ 56 extern int max_num_of_processes; 57 58 #define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32 59 #define KFD_MAX_NUM_OF_PROCESSES 512 60 61 /* 62 * Kernel module parameter to specify maximum number of supported queues 63 * per process 64 */ 65 extern int max_num_of_queues_per_process; 66 67 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128 68 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 69 70 #define KFD_KERNEL_QUEUE_SIZE 2048 71 72 /* Kernel module parameter to specify the scheduling policy */ 73 extern int sched_policy; 74 75 /** 76 * enum kfd_sched_policy 77 * 78 * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 79 * scheduling. In this scheduling mode we're using the firmware code to 80 * schedule the user mode queues and kernel queues such as HIQ and DIQ. 81 * the HIQ queue is used as a special queue that dispatches the configuration 82 * to the cp and the user mode queues list that are currently running. 83 * the DIQ queue is a debugging queue that dispatches debugging commands to the 84 * firmware. 85 * in this scheduling mode user mode queues over subscription feature is 86 * enabled. 87 * 88 * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 89 * subscription feature disabled. 90 * 91 * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 92 * set the command processor registers and sets the queues "manually". This 93 * mode is used *ONLY* for debugging proposes. 94 * 95 */ 96 enum kfd_sched_policy { 97 KFD_SCHED_POLICY_HWS = 0, 98 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 99 KFD_SCHED_POLICY_NO_HWS 100 }; 101 102 enum cache_policy { 103 cache_policy_coherent, 104 cache_policy_noncoherent 105 }; 106 107 struct kfd_device_info { 108 unsigned int max_pasid_bits; 109 size_t ih_ring_entry_size; 110 uint16_t mqd_size_aligned; 111 }; 112 113 struct kfd_dev { 114 struct kgd_dev *kgd; 115 116 const struct kfd_device_info *device_info; 117 struct pci_dev *pdev; 118 119 unsigned int id; /* topology stub index */ 120 121 phys_addr_t doorbell_base; /* Start of actual doorbells used by 122 * KFD. It is aligned for mapping 123 * into user mode 124 */ 125 size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell 126 * to HW doorbell, GFX reserved some 127 * at the start) 128 */ 129 size_t doorbell_process_limit; /* Number of processes we have doorbell 130 * space for. 131 */ 132 u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 133 * page used by kernel queue 134 */ 135 136 struct kgd2kfd_shared_resources shared_resources; 137 138 void *interrupt_ring; 139 size_t interrupt_ring_size; 140 atomic_t interrupt_ring_rptr; 141 atomic_t interrupt_ring_wptr; 142 struct work_struct interrupt_work; 143 spinlock_t interrupt_lock; 144 145 /* QCM Device instance */ 146 struct device_queue_manager *dqm; 147 148 bool init_complete; 149 /* 150 * Interrupts of interest to KFD are copied 151 * from the HW ring into a SW ring. 152 */ 153 bool interrupts_active; 154 }; 155 156 /* KGD2KFD callbacks */ 157 void kgd2kfd_exit(void); 158 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev); 159 bool kgd2kfd_device_init(struct kfd_dev *kfd, 160 const struct kgd2kfd_shared_resources *gpu_resources); 161 void kgd2kfd_device_exit(struct kfd_dev *kfd); 162 163 extern const struct kfd2kgd_calls *kfd2kgd; 164 165 struct kfd_mem_obj { 166 void *bo; 167 uint64_t gpu_addr; 168 uint32_t *cpu_ptr; 169 }; 170 171 enum kfd_mempool { 172 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 173 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 174 KFD_MEMPOOL_FRAMEBUFFER = 3, 175 }; 176 177 /* Character device interface */ 178 int kfd_chardev_init(void); 179 void kfd_chardev_exit(void); 180 struct device *kfd_chardev(void); 181 182 /** 183 * enum kfd_preempt_type_filter 184 * 185 * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. 186 * 187 * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the 188 * running queues list. 189 * 190 * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to 191 * specific process. 192 * 193 */ 194 enum kfd_preempt_type_filter { 195 KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, 196 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 197 KFD_PREEMPT_TYPE_FILTER_BY_PASID 198 }; 199 200 enum kfd_preempt_type { 201 KFD_PREEMPT_TYPE_WAVEFRONT, 202 KFD_PREEMPT_TYPE_WAVEFRONT_RESET 203 }; 204 205 /** 206 * enum kfd_queue_type 207 * 208 * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 209 * 210 * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. 211 * 212 * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 213 * 214 * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 215 */ 216 enum kfd_queue_type { 217 KFD_QUEUE_TYPE_COMPUTE, 218 KFD_QUEUE_TYPE_SDMA, 219 KFD_QUEUE_TYPE_HIQ, 220 KFD_QUEUE_TYPE_DIQ 221 }; 222 223 enum kfd_queue_format { 224 KFD_QUEUE_FORMAT_PM4, 225 KFD_QUEUE_FORMAT_AQL 226 }; 227 228 /** 229 * struct queue_properties 230 * 231 * @type: The queue type. 232 * 233 * @queue_id: Queue identifier. 234 * 235 * @queue_address: Queue ring buffer address. 236 * 237 * @queue_size: Queue ring buffer size. 238 * 239 * @priority: Defines the queue priority relative to other queues in the 240 * process. 241 * This is just an indication and HW scheduling may override the priority as 242 * necessary while keeping the relative prioritization. 243 * the priority granularity is from 0 to f which f is the highest priority. 244 * currently all queues are initialized with the highest priority. 245 * 246 * @queue_percent: This field is partially implemented and currently a zero in 247 * this field defines that the queue is non active. 248 * 249 * @read_ptr: User space address which points to the number of dwords the 250 * cp read from the ring buffer. This field updates automatically by the H/W. 251 * 252 * @write_ptr: Defines the number of dwords written to the ring buffer. 253 * 254 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to 255 * the queue ring buffer. This field should be similar to write_ptr and the user 256 * should update this field after he updated the write_ptr. 257 * 258 * @doorbell_off: The doorbell offset in the doorbell pci-bar. 259 * 260 * @is_interop: Defines if this is a interop queue. Interop queue means that the 261 * queue can access both graphics and compute resources. 262 * 263 * @is_active: Defines if the queue is active or not. 264 * 265 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 266 * of the queue. 267 * 268 * This structure represents the queue properties for each queue no matter if 269 * it's user mode or kernel mode queue. 270 * 271 */ 272 struct queue_properties { 273 enum kfd_queue_type type; 274 enum kfd_queue_format format; 275 unsigned int queue_id; 276 uint64_t queue_address; 277 uint64_t queue_size; 278 uint32_t priority; 279 uint32_t queue_percent; 280 uint32_t *read_ptr; 281 uint32_t *write_ptr; 282 uint32_t __iomem *doorbell_ptr; 283 uint32_t doorbell_off; 284 bool is_interop; 285 bool is_active; 286 /* Not relevant for user mode queues in cp scheduling */ 287 unsigned int vmid; 288 }; 289 290 /** 291 * struct queue 292 * 293 * @list: Queue linked list. 294 * 295 * @mqd: The queue MQD. 296 * 297 * @mqd_mem_obj: The MQD local gpu memory object. 298 * 299 * @gart_mqd_addr: The MQD gart mc address. 300 * 301 * @properties: The queue properties. 302 * 303 * @mec: Used only in no cp scheduling mode and identifies to micro engine id 304 * that the queue should be execute on. 305 * 306 * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. 307 * 308 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 309 * 310 * @process: The kfd process that created this queue. 311 * 312 * @device: The kfd device that created this queue. 313 * 314 * This structure represents user mode compute queues. 315 * It contains all the necessary data to handle such queues. 316 * 317 */ 318 319 struct queue { 320 struct list_head list; 321 void *mqd; 322 struct kfd_mem_obj *mqd_mem_obj; 323 uint64_t gart_mqd_addr; 324 struct queue_properties properties; 325 326 uint32_t mec; 327 uint32_t pipe; 328 uint32_t queue; 329 330 struct kfd_process *process; 331 struct kfd_dev *device; 332 }; 333 334 /* 335 * Please read the kfd_mqd_manager.h description. 336 */ 337 enum KFD_MQD_TYPE { 338 KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ 339 KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ 340 KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ 341 KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ 342 KFD_MQD_TYPE_MAX 343 }; 344 345 struct scheduling_resources { 346 unsigned int vmid_mask; 347 enum kfd_queue_type type; 348 uint64_t queue_mask; 349 uint64_t gws_mask; 350 uint32_t oac_mask; 351 uint32_t gds_heap_base; 352 uint32_t gds_heap_size; 353 }; 354 355 struct process_queue_manager { 356 /* data */ 357 struct kfd_process *process; 358 unsigned int num_concurrent_processes; 359 struct list_head queues; 360 unsigned long *queue_slot_bitmap; 361 }; 362 363 struct qcm_process_device { 364 /* The Device Queue Manager that owns this data */ 365 struct device_queue_manager *dqm; 366 struct process_queue_manager *pqm; 367 /* Device Queue Manager lock */ 368 struct mutex *lock; 369 /* Queues list */ 370 struct list_head queues_list; 371 struct list_head priv_queue_list; 372 373 unsigned int queue_count; 374 unsigned int vmid; 375 bool is_debug; 376 /* 377 * All the memory management data should be here too 378 */ 379 uint64_t gds_context_area; 380 uint32_t sh_mem_config; 381 uint32_t sh_mem_bases; 382 uint32_t sh_mem_ape1_base; 383 uint32_t sh_mem_ape1_limit; 384 uint32_t page_table_base; 385 uint32_t gds_size; 386 uint32_t num_gws; 387 uint32_t num_oac; 388 }; 389 390 /* Data that is per-process-per device. */ 391 struct kfd_process_device { 392 /* 393 * List of all per-device data for a process. 394 * Starts from kfd_process.per_device_data. 395 */ 396 struct list_head per_device_list; 397 398 /* The device that owns this data. */ 399 struct kfd_dev *dev; 400 401 402 /* per-process-per device QCM data structure */ 403 struct qcm_process_device qpd; 404 405 /*Apertures*/ 406 uint64_t lds_base; 407 uint64_t lds_limit; 408 uint64_t gpuvm_base; 409 uint64_t gpuvm_limit; 410 uint64_t scratch_base; 411 uint64_t scratch_limit; 412 413 /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 414 bool bound; 415 }; 416 417 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 418 419 /* Process data */ 420 struct kfd_process { 421 /* 422 * kfd_process are stored in an mm_struct*->kfd_process* 423 * hash table (kfd_processes in kfd_process.c) 424 */ 425 struct hlist_node kfd_processes; 426 427 struct mm_struct *mm; 428 429 struct mutex mutex; 430 431 /* 432 * In any process, the thread that started main() is the lead 433 * thread and outlives the rest. 434 * It is here because amd_iommu_bind_pasid wants a task_struct. 435 */ 436 struct task_struct *lead_thread; 437 438 /* We want to receive a notification when the mm_struct is destroyed */ 439 struct mmu_notifier mmu_notifier; 440 441 /* Use for delayed freeing of kfd_process structure */ 442 struct rcu_head rcu; 443 444 unsigned int pasid; 445 446 /* 447 * List of kfd_process_device structures, 448 * one for each device the process is using. 449 */ 450 struct list_head per_device_data; 451 452 struct process_queue_manager pqm; 453 454 /* The process's queues. */ 455 size_t queue_array_size; 456 457 /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ 458 struct kfd_queue **queues; 459 460 unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; 461 462 /*Is the user space process 32 bit?*/ 463 bool is_32bit_user_mode; 464 }; 465 466 /** 467 * Ioctl function type. 468 * 469 * \param filep pointer to file structure. 470 * \param p amdkfd process pointer. 471 * \param data pointer to arg that was copied from user. 472 */ 473 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 474 void *data); 475 476 struct amdkfd_ioctl_desc { 477 unsigned int cmd; 478 int flags; 479 amdkfd_ioctl_t *func; 480 unsigned int cmd_drv; 481 const char *name; 482 }; 483 484 void kfd_process_create_wq(void); 485 void kfd_process_destroy_wq(void); 486 struct kfd_process *kfd_create_process(const struct task_struct *); 487 struct kfd_process *kfd_get_process(const struct task_struct *); 488 489 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 490 struct kfd_process *p); 491 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); 492 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 493 struct kfd_process *p, 494 int create_pdd); 495 496 /* Process device data iterator */ 497 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); 498 struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, 499 struct kfd_process_device *pdd); 500 bool kfd_has_process_device_data(struct kfd_process *p); 501 502 /* PASIDs */ 503 int kfd_pasid_init(void); 504 void kfd_pasid_exit(void); 505 bool kfd_set_pasid_limit(unsigned int new_limit); 506 unsigned int kfd_get_pasid_limit(void); 507 unsigned int kfd_pasid_alloc(void); 508 void kfd_pasid_free(unsigned int pasid); 509 510 /* Doorbells */ 511 void kfd_doorbell_init(struct kfd_dev *kfd); 512 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); 513 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 514 unsigned int *doorbell_off); 515 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 516 u32 read_kernel_doorbell(u32 __iomem *db); 517 void write_kernel_doorbell(u32 __iomem *db, u32 value); 518 unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, 519 struct kfd_process *process, 520 unsigned int queue_id); 521 522 extern struct device *kfd_device; 523 524 /* Topology */ 525 int kfd_topology_init(void); 526 void kfd_topology_shutdown(void); 527 int kfd_topology_add_device(struct kfd_dev *gpu); 528 int kfd_topology_remove_device(struct kfd_dev *gpu); 529 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 530 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 531 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); 532 533 /* Interrupts */ 534 int kfd_interrupt_init(struct kfd_dev *dev); 535 void kfd_interrupt_exit(struct kfd_dev *dev); 536 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 537 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 538 539 /* Power Management */ 540 void kgd2kfd_suspend(struct kfd_dev *kfd); 541 int kgd2kfd_resume(struct kfd_dev *kfd); 542 543 /* amdkfd Apertures */ 544 int kfd_init_apertures(struct kfd_process *process); 545 546 /* Queue Context Management */ 547 inline uint32_t lower_32(uint64_t x); 548 inline uint32_t upper_32(uint64_t x); 549 550 int init_queue(struct queue **q, struct queue_properties properties); 551 void uninit_queue(struct queue *q); 552 void print_queue_properties(struct queue_properties *q); 553 void print_queue(struct queue *q); 554 555 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 556 struct kfd_dev *dev); 557 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 558 void device_queue_manager_uninit(struct device_queue_manager *dqm); 559 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 560 enum kfd_queue_type type); 561 void kernel_queue_uninit(struct kernel_queue *kq); 562 563 /* Process Queue Manager */ 564 struct process_queue_node { 565 struct queue *q; 566 struct kernel_queue *kq; 567 struct list_head process_queue_list; 568 }; 569 570 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 571 void pqm_uninit(struct process_queue_manager *pqm); 572 int pqm_create_queue(struct process_queue_manager *pqm, 573 struct kfd_dev *dev, 574 struct file *f, 575 struct queue_properties *properties, 576 unsigned int flags, 577 enum kfd_queue_type type, 578 unsigned int *qid); 579 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 580 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 581 struct queue_properties *p); 582 583 /* Packet Manager */ 584 585 #define KFD_HIQ_TIMEOUT (500) 586 587 #define KFD_FENCE_COMPLETED (100) 588 #define KFD_FENCE_INIT (10) 589 #define KFD_UNMAP_LATENCY (150) 590 591 struct packet_manager { 592 struct device_queue_manager *dqm; 593 struct kernel_queue *priv_queue; 594 struct mutex lock; 595 bool allocated; 596 struct kfd_mem_obj *ib_buffer_obj; 597 }; 598 599 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 600 void pm_uninit(struct packet_manager *pm); 601 int pm_send_set_resources(struct packet_manager *pm, 602 struct scheduling_resources *res); 603 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 604 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 605 uint32_t fence_value); 606 607 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 608 enum kfd_preempt_type_filter mode, 609 uint32_t filter_param, bool reset, 610 unsigned int sdma_engine); 611 612 void pm_release_ib(struct packet_manager *pm); 613 614 uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 615 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, 616 struct kfd_process *process); 617 618 #endif 619