1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef KFD_PRIV_H_INCLUDED 24 #define KFD_PRIV_H_INCLUDED 25 26 #include <linux/hashtable.h> 27 #include <linux/mmu_notifier.h> 28 #include <linux/mutex.h> 29 #include <linux/types.h> 30 #include <linux/atomic.h> 31 #include <linux/workqueue.h> 32 #include <linux/spinlock.h> 33 #include <linux/kfd_ioctl.h> 34 #include <kgd_kfd_interface.h> 35 36 #define KFD_SYSFS_FILE_MODE 0444 37 38 /* 39 * When working with cp scheduler we should assign the HIQ manually or via 40 * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot 41 * definitions for Kaveri. In Kaveri only the first ME queues participates 42 * in the cp scheduling taking that in mind we set the HIQ slot in the 43 * second ME. 44 */ 45 #define KFD_CIK_HIQ_PIPE 4 46 #define KFD_CIK_HIQ_QUEUE 0 47 48 /* GPU ID hash width in bits */ 49 #define KFD_GPU_ID_HASH_WIDTH 16 50 51 /* Macro for allocating structures */ 52 #define kfd_alloc_struct(ptr_to_struct) \ 53 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 54 55 /* Kernel module parameter to specify maximum number of supported processes */ 56 extern int max_num_of_processes; 57 58 #define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32 59 #define KFD_MAX_NUM_OF_PROCESSES 512 60 61 /* 62 * Kernel module parameter to specify maximum number of supported queues 63 * per process 64 */ 65 extern int max_num_of_queues_per_process; 66 67 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128 68 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 69 70 #define KFD_KERNEL_QUEUE_SIZE 2048 71 72 /* Kernel module parameter to specify the scheduling policy */ 73 extern int sched_policy; 74 75 /** 76 * enum kfd_sched_policy 77 * 78 * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 79 * scheduling. In this scheduling mode we're using the firmware code to 80 * schedule the user mode queues and kernel queues such as HIQ and DIQ. 81 * the HIQ queue is used as a special queue that dispatches the configuration 82 * to the cp and the user mode queues list that are currently running. 83 * the DIQ queue is a debugging queue that dispatches debugging commands to the 84 * firmware. 85 * in this scheduling mode user mode queues over subscription feature is 86 * enabled. 87 * 88 * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 89 * subscription feature disabled. 90 * 91 * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 92 * set the command processor registers and sets the queues "manually". This 93 * mode is used *ONLY* for debugging proposes. 94 * 95 */ 96 enum kfd_sched_policy { 97 KFD_SCHED_POLICY_HWS = 0, 98 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 99 KFD_SCHED_POLICY_NO_HWS 100 }; 101 102 enum cache_policy { 103 cache_policy_coherent, 104 cache_policy_noncoherent 105 }; 106 107 struct kfd_device_info { 108 unsigned int max_pasid_bits; 109 size_t ih_ring_entry_size; 110 uint16_t mqd_size_aligned; 111 }; 112 113 struct kfd_dev { 114 struct kgd_dev *kgd; 115 116 const struct kfd_device_info *device_info; 117 struct pci_dev *pdev; 118 119 unsigned int id; /* topology stub index */ 120 121 phys_addr_t doorbell_base; /* Start of actual doorbells used by 122 * KFD. It is aligned for mapping 123 * into user mode 124 */ 125 size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell 126 * to HW doorbell, GFX reserved some 127 * at the start) 128 */ 129 size_t doorbell_process_limit; /* Number of processes we have doorbell 130 * space for. 131 */ 132 u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 133 * page used by kernel queue 134 */ 135 136 struct kgd2kfd_shared_resources shared_resources; 137 138 /* QCM Device instance */ 139 struct device_queue_manager *dqm; 140 141 bool init_complete; 142 }; 143 144 /* KGD2KFD callbacks */ 145 void kgd2kfd_exit(void); 146 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev); 147 bool kgd2kfd_device_init(struct kfd_dev *kfd, 148 const struct kgd2kfd_shared_resources *gpu_resources); 149 void kgd2kfd_device_exit(struct kfd_dev *kfd); 150 151 extern const struct kfd2kgd_calls *kfd2kgd; 152 153 struct kfd_mem_obj { 154 void *bo; 155 uint64_t gpu_addr; 156 uint32_t *cpu_ptr; 157 }; 158 159 enum kfd_mempool { 160 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 161 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 162 KFD_MEMPOOL_FRAMEBUFFER = 3, 163 }; 164 165 /* Character device interface */ 166 int kfd_chardev_init(void); 167 void kfd_chardev_exit(void); 168 struct device *kfd_chardev(void); 169 170 /** 171 * enum kfd_preempt_type_filter 172 * 173 * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. 174 * 175 * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the 176 * running queues list. 177 * 178 * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to 179 * specific process. 180 * 181 */ 182 enum kfd_preempt_type_filter { 183 KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, 184 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 185 KFD_PREEMPT_TYPE_FILTER_BY_PASID 186 }; 187 188 enum kfd_preempt_type { 189 KFD_PREEMPT_TYPE_WAVEFRONT, 190 KFD_PREEMPT_TYPE_WAVEFRONT_RESET 191 }; 192 193 /** 194 * enum kfd_queue_type 195 * 196 * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 197 * 198 * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. 199 * 200 * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 201 * 202 * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 203 */ 204 enum kfd_queue_type { 205 KFD_QUEUE_TYPE_COMPUTE, 206 KFD_QUEUE_TYPE_SDMA, 207 KFD_QUEUE_TYPE_HIQ, 208 KFD_QUEUE_TYPE_DIQ 209 }; 210 211 enum kfd_queue_format { 212 KFD_QUEUE_FORMAT_PM4, 213 KFD_QUEUE_FORMAT_AQL 214 }; 215 216 /** 217 * struct queue_properties 218 * 219 * @type: The queue type. 220 * 221 * @queue_id: Queue identifier. 222 * 223 * @queue_address: Queue ring buffer address. 224 * 225 * @queue_size: Queue ring buffer size. 226 * 227 * @priority: Defines the queue priority relative to other queues in the 228 * process. 229 * This is just an indication and HW scheduling may override the priority as 230 * necessary while keeping the relative prioritization. 231 * the priority granularity is from 0 to f which f is the highest priority. 232 * currently all queues are initialized with the highest priority. 233 * 234 * @queue_percent: This field is partially implemented and currently a zero in 235 * this field defines that the queue is non active. 236 * 237 * @read_ptr: User space address which points to the number of dwords the 238 * cp read from the ring buffer. This field updates automatically by the H/W. 239 * 240 * @write_ptr: Defines the number of dwords written to the ring buffer. 241 * 242 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to 243 * the queue ring buffer. This field should be similar to write_ptr and the user 244 * should update this field after he updated the write_ptr. 245 * 246 * @doorbell_off: The doorbell offset in the doorbell pci-bar. 247 * 248 * @is_interop: Defines if this is a interop queue. Interop queue means that the 249 * queue can access both graphics and compute resources. 250 * 251 * @is_active: Defines if the queue is active or not. 252 * 253 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 254 * of the queue. 255 * 256 * This structure represents the queue properties for each queue no matter if 257 * it's user mode or kernel mode queue. 258 * 259 */ 260 struct queue_properties { 261 enum kfd_queue_type type; 262 enum kfd_queue_format format; 263 unsigned int queue_id; 264 uint64_t queue_address; 265 uint64_t queue_size; 266 uint32_t priority; 267 uint32_t queue_percent; 268 uint32_t *read_ptr; 269 uint32_t *write_ptr; 270 uint32_t __iomem *doorbell_ptr; 271 uint32_t doorbell_off; 272 bool is_interop; 273 bool is_active; 274 /* Not relevant for user mode queues in cp scheduling */ 275 unsigned int vmid; 276 }; 277 278 /** 279 * struct queue 280 * 281 * @list: Queue linked list. 282 * 283 * @mqd: The queue MQD. 284 * 285 * @mqd_mem_obj: The MQD local gpu memory object. 286 * 287 * @gart_mqd_addr: The MQD gart mc address. 288 * 289 * @properties: The queue properties. 290 * 291 * @mec: Used only in no cp scheduling mode and identifies to micro engine id 292 * that the queue should be execute on. 293 * 294 * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. 295 * 296 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 297 * 298 * @process: The kfd process that created this queue. 299 * 300 * @device: The kfd device that created this queue. 301 * 302 * This structure represents user mode compute queues. 303 * It contains all the necessary data to handle such queues. 304 * 305 */ 306 307 struct queue { 308 struct list_head list; 309 void *mqd; 310 struct kfd_mem_obj *mqd_mem_obj; 311 uint64_t gart_mqd_addr; 312 struct queue_properties properties; 313 314 uint32_t mec; 315 uint32_t pipe; 316 uint32_t queue; 317 318 struct kfd_process *process; 319 struct kfd_dev *device; 320 }; 321 322 /* 323 * Please read the kfd_mqd_manager.h description. 324 */ 325 enum KFD_MQD_TYPE { 326 KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ 327 KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ 328 KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ 329 KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ 330 KFD_MQD_TYPE_MAX 331 }; 332 333 struct scheduling_resources { 334 unsigned int vmid_mask; 335 enum kfd_queue_type type; 336 uint64_t queue_mask; 337 uint64_t gws_mask; 338 uint32_t oac_mask; 339 uint32_t gds_heap_base; 340 uint32_t gds_heap_size; 341 }; 342 343 struct process_queue_manager { 344 /* data */ 345 struct kfd_process *process; 346 unsigned int num_concurrent_processes; 347 struct list_head queues; 348 unsigned long *queue_slot_bitmap; 349 }; 350 351 struct qcm_process_device { 352 /* The Device Queue Manager that owns this data */ 353 struct device_queue_manager *dqm; 354 struct process_queue_manager *pqm; 355 /* Device Queue Manager lock */ 356 struct mutex *lock; 357 /* Queues list */ 358 struct list_head queues_list; 359 struct list_head priv_queue_list; 360 361 unsigned int queue_count; 362 unsigned int vmid; 363 bool is_debug; 364 /* 365 * All the memory management data should be here too 366 */ 367 uint64_t gds_context_area; 368 uint32_t sh_mem_config; 369 uint32_t sh_mem_bases; 370 uint32_t sh_mem_ape1_base; 371 uint32_t sh_mem_ape1_limit; 372 uint32_t page_table_base; 373 uint32_t gds_size; 374 uint32_t num_gws; 375 uint32_t num_oac; 376 }; 377 378 /* Data that is per-process-per device. */ 379 struct kfd_process_device { 380 /* 381 * List of all per-device data for a process. 382 * Starts from kfd_process.per_device_data. 383 */ 384 struct list_head per_device_list; 385 386 /* The device that owns this data. */ 387 struct kfd_dev *dev; 388 389 390 /* per-process-per device QCM data structure */ 391 struct qcm_process_device qpd; 392 393 /*Apertures*/ 394 uint64_t lds_base; 395 uint64_t lds_limit; 396 uint64_t gpuvm_base; 397 uint64_t gpuvm_limit; 398 uint64_t scratch_base; 399 uint64_t scratch_limit; 400 401 /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 402 bool bound; 403 }; 404 405 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 406 407 /* Process data */ 408 struct kfd_process { 409 /* 410 * kfd_process are stored in an mm_struct*->kfd_process* 411 * hash table (kfd_processes in kfd_process.c) 412 */ 413 struct hlist_node kfd_processes; 414 415 struct mm_struct *mm; 416 417 struct mutex mutex; 418 419 /* 420 * In any process, the thread that started main() is the lead 421 * thread and outlives the rest. 422 * It is here because amd_iommu_bind_pasid wants a task_struct. 423 */ 424 struct task_struct *lead_thread; 425 426 /* We want to receive a notification when the mm_struct is destroyed */ 427 struct mmu_notifier mmu_notifier; 428 429 /* Use for delayed freeing of kfd_process structure */ 430 struct rcu_head rcu; 431 432 unsigned int pasid; 433 434 /* 435 * List of kfd_process_device structures, 436 * one for each device the process is using. 437 */ 438 struct list_head per_device_data; 439 440 struct process_queue_manager pqm; 441 442 /* The process's queues. */ 443 size_t queue_array_size; 444 445 /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ 446 struct kfd_queue **queues; 447 448 unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; 449 450 /*Is the user space process 32 bit?*/ 451 bool is_32bit_user_mode; 452 }; 453 454 /** 455 * Ioctl function type. 456 * 457 * \param filep pointer to file structure. 458 * \param p amdkfd process pointer. 459 * \param data pointer to arg that was copied from user. 460 */ 461 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 462 void *data); 463 464 struct amdkfd_ioctl_desc { 465 unsigned int cmd; 466 int flags; 467 amdkfd_ioctl_t *func; 468 unsigned int cmd_drv; 469 const char *name; 470 }; 471 472 void kfd_process_create_wq(void); 473 void kfd_process_destroy_wq(void); 474 struct kfd_process *kfd_create_process(const struct task_struct *); 475 struct kfd_process *kfd_get_process(const struct task_struct *); 476 477 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 478 struct kfd_process *p); 479 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); 480 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 481 struct kfd_process *p, 482 int create_pdd); 483 484 /* Process device data iterator */ 485 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); 486 struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, 487 struct kfd_process_device *pdd); 488 bool kfd_has_process_device_data(struct kfd_process *p); 489 490 /* PASIDs */ 491 int kfd_pasid_init(void); 492 void kfd_pasid_exit(void); 493 bool kfd_set_pasid_limit(unsigned int new_limit); 494 unsigned int kfd_get_pasid_limit(void); 495 unsigned int kfd_pasid_alloc(void); 496 void kfd_pasid_free(unsigned int pasid); 497 498 /* Doorbells */ 499 void kfd_doorbell_init(struct kfd_dev *kfd); 500 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); 501 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 502 unsigned int *doorbell_off); 503 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 504 u32 read_kernel_doorbell(u32 __iomem *db); 505 void write_kernel_doorbell(u32 __iomem *db, u32 value); 506 unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, 507 struct kfd_process *process, 508 unsigned int queue_id); 509 510 extern struct device *kfd_device; 511 512 /* Topology */ 513 int kfd_topology_init(void); 514 void kfd_topology_shutdown(void); 515 int kfd_topology_add_device(struct kfd_dev *gpu); 516 int kfd_topology_remove_device(struct kfd_dev *gpu); 517 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 518 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 519 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); 520 521 /* Interrupts */ 522 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 523 524 /* Power Management */ 525 void kgd2kfd_suspend(struct kfd_dev *kfd); 526 int kgd2kfd_resume(struct kfd_dev *kfd); 527 528 /* amdkfd Apertures */ 529 int kfd_init_apertures(struct kfd_process *process); 530 531 /* Queue Context Management */ 532 inline uint32_t lower_32(uint64_t x); 533 inline uint32_t upper_32(uint64_t x); 534 535 int init_queue(struct queue **q, struct queue_properties properties); 536 void uninit_queue(struct queue *q); 537 void print_queue_properties(struct queue_properties *q); 538 void print_queue(struct queue *q); 539 540 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 541 struct kfd_dev *dev); 542 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 543 void device_queue_manager_uninit(struct device_queue_manager *dqm); 544 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 545 enum kfd_queue_type type); 546 void kernel_queue_uninit(struct kernel_queue *kq); 547 548 /* Process Queue Manager */ 549 struct process_queue_node { 550 struct queue *q; 551 struct kernel_queue *kq; 552 struct list_head process_queue_list; 553 }; 554 555 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 556 void pqm_uninit(struct process_queue_manager *pqm); 557 int pqm_create_queue(struct process_queue_manager *pqm, 558 struct kfd_dev *dev, 559 struct file *f, 560 struct queue_properties *properties, 561 unsigned int flags, 562 enum kfd_queue_type type, 563 unsigned int *qid); 564 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 565 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 566 struct queue_properties *p); 567 568 /* Packet Manager */ 569 570 #define KFD_HIQ_TIMEOUT (500) 571 572 #define KFD_FENCE_COMPLETED (100) 573 #define KFD_FENCE_INIT (10) 574 #define KFD_UNMAP_LATENCY (150) 575 576 struct packet_manager { 577 struct device_queue_manager *dqm; 578 struct kernel_queue *priv_queue; 579 struct mutex lock; 580 bool allocated; 581 struct kfd_mem_obj *ib_buffer_obj; 582 }; 583 584 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 585 void pm_uninit(struct packet_manager *pm); 586 int pm_send_set_resources(struct packet_manager *pm, 587 struct scheduling_resources *res); 588 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 589 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 590 uint32_t fence_value); 591 592 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 593 enum kfd_preempt_type_filter mode, 594 uint32_t filter_param, bool reset, 595 unsigned int sdma_engine); 596 597 void pm_release_ib(struct packet_manager *pm); 598 599 uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 600 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, 601 struct kfd_process *process); 602 603 #endif 604