xref: /openbmc/linux/drivers/gpu/drm/amd/amdkfd/kfd_priv.h (revision e33bbe69149b802c0c77bfb822685772f85388ca)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef KFD_PRIV_H_INCLUDED
24 #define KFD_PRIV_H_INCLUDED
25 
26 #include <linux/hashtable.h>
27 #include <linux/mmu_notifier.h>
28 #include <linux/mutex.h>
29 #include <linux/types.h>
30 #include <linux/atomic.h>
31 #include <linux/workqueue.h>
32 #include <linux/spinlock.h>
33 #include <linux/kfd_ioctl.h>
34 #include <linux/idr.h>
35 #include <linux/kfifo.h>
36 #include <linux/seq_file.h>
37 #include <linux/kref.h>
38 #include <kgd_kfd_interface.h>
39 
40 #include "amd_shared.h"
41 
42 #define KFD_SYSFS_FILE_MODE 0444
43 
44 #define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
45 #define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
46 #define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
47 
48 /*
49  * When working with cp scheduler we should assign the HIQ manually or via
50  * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot
51  * definitions for Kaveri. In Kaveri only the first ME queues participates
52  * in the cp scheduling taking that in mind we set the HIQ slot in the
53  * second ME.
54  */
55 #define KFD_CIK_HIQ_PIPE 4
56 #define KFD_CIK_HIQ_QUEUE 0
57 
58 /* GPU ID hash width in bits */
59 #define KFD_GPU_ID_HASH_WIDTH 16
60 
61 /* Macro for allocating structures */
62 #define kfd_alloc_struct(ptr_to_struct)	\
63 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
64 
65 #define KFD_MAX_NUM_OF_PROCESSES 512
66 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
67 
68 /*
69  * Size of the per-process TBA+TMA buffer: 2 pages
70  *
71  * The first page is the TBA used for the CWSR ISA code. The second
72  * page is used as TMA for daisy changing a user-mode trap handler.
73  */
74 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
75 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE
76 
77 /*
78  * Kernel module parameter to specify maximum number of supported queues per
79  * device
80  */
81 extern int max_num_of_queues_per_device;
82 
83 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
84 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
85 	(KFD_MAX_NUM_OF_PROCESSES *			\
86 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
87 
88 #define KFD_KERNEL_QUEUE_SIZE 2048
89 
90 /* Kernel module parameter to specify the scheduling policy */
91 extern int sched_policy;
92 
93 /*
94  * Kernel module parameter to specify the maximum process
95  * number per HW scheduler
96  */
97 extern int hws_max_conc_proc;
98 
99 extern int cwsr_enable;
100 
101 /*
102  * Kernel module parameter to specify whether to send sigterm to HSA process on
103  * unhandled exception
104  */
105 extern int send_sigterm;
106 
107 /*
108  * This kernel module is used to simulate large bar machine on non-large bar
109  * enabled machines.
110  */
111 extern int debug_largebar;
112 
113 /*
114  * Ignore CRAT table during KFD initialization, can be used to work around
115  * broken CRAT tables on some AMD systems
116  */
117 extern int ignore_crat;
118 
119 /**
120  * enum kfd_sched_policy
121  *
122  * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp)
123  * scheduling. In this scheduling mode we're using the firmware code to
124  * schedule the user mode queues and kernel queues such as HIQ and DIQ.
125  * the HIQ queue is used as a special queue that dispatches the configuration
126  * to the cp and the user mode queues list that are currently running.
127  * the DIQ queue is a debugging queue that dispatches debugging commands to the
128  * firmware.
129  * in this scheduling mode user mode queues over subscription feature is
130  * enabled.
131  *
132  * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over
133  * subscription feature disabled.
134  *
135  * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly
136  * set the command processor registers and sets the queues "manually". This
137  * mode is used *ONLY* for debugging proposes.
138  *
139  */
140 enum kfd_sched_policy {
141 	KFD_SCHED_POLICY_HWS = 0,
142 	KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
143 	KFD_SCHED_POLICY_NO_HWS
144 };
145 
146 enum cache_policy {
147 	cache_policy_coherent,
148 	cache_policy_noncoherent
149 };
150 
151 struct kfd_event_interrupt_class {
152 	bool (*interrupt_isr)(struct kfd_dev *dev,
153 				const uint32_t *ih_ring_entry);
154 	void (*interrupt_wq)(struct kfd_dev *dev,
155 				const uint32_t *ih_ring_entry);
156 };
157 
158 struct kfd_device_info {
159 	enum amd_asic_type asic_family;
160 	const struct kfd_event_interrupt_class *event_interrupt_class;
161 	unsigned int max_pasid_bits;
162 	unsigned int max_no_of_hqd;
163 	size_t ih_ring_entry_size;
164 	uint8_t num_of_watch_points;
165 	uint16_t mqd_size_aligned;
166 	bool supports_cwsr;
167 	bool needs_iommu_device;
168 	bool needs_pci_atomics;
169 };
170 
171 struct kfd_mem_obj {
172 	uint32_t range_start;
173 	uint32_t range_end;
174 	uint64_t gpu_addr;
175 	uint32_t *cpu_ptr;
176 };
177 
178 struct kfd_vmid_info {
179 	uint32_t first_vmid_kfd;
180 	uint32_t last_vmid_kfd;
181 	uint32_t vmid_num_kfd;
182 };
183 
184 struct kfd_dev {
185 	struct kgd_dev *kgd;
186 
187 	const struct kfd_device_info *device_info;
188 	struct pci_dev *pdev;
189 
190 	unsigned int id;		/* topology stub index */
191 
192 	phys_addr_t doorbell_base;	/* Start of actual doorbells used by
193 					 * KFD. It is aligned for mapping
194 					 * into user mode
195 					 */
196 	size_t doorbell_id_offset;	/* Doorbell offset (from KFD doorbell
197 					 * to HW doorbell, GFX reserved some
198 					 * at the start)
199 					 */
200 	u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
201 					   * page used by kernel queue
202 					   */
203 
204 	struct kgd2kfd_shared_resources shared_resources;
205 	struct kfd_vmid_info vm_info;
206 
207 	const struct kfd2kgd_calls *kfd2kgd;
208 	struct mutex doorbell_mutex;
209 	DECLARE_BITMAP(doorbell_available_index,
210 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
211 
212 	void *gtt_mem;
213 	uint64_t gtt_start_gpu_addr;
214 	void *gtt_start_cpu_ptr;
215 	void *gtt_sa_bitmap;
216 	struct mutex gtt_sa_lock;
217 	unsigned int gtt_sa_chunk_size;
218 	unsigned int gtt_sa_num_of_chunks;
219 
220 	/* Interrupts */
221 	struct kfifo ih_fifo;
222 	struct workqueue_struct *ih_wq;
223 	struct work_struct interrupt_work;
224 	spinlock_t interrupt_lock;
225 
226 	/* QCM Device instance */
227 	struct device_queue_manager *dqm;
228 
229 	bool init_complete;
230 	/*
231 	 * Interrupts of interest to KFD are copied
232 	 * from the HW ring into a SW ring.
233 	 */
234 	bool interrupts_active;
235 
236 	/* Debug manager */
237 	struct kfd_dbgmgr           *dbgmgr;
238 
239 	/* Maximum process number mapped to HW scheduler */
240 	unsigned int max_proc_per_quantum;
241 
242 	/* CWSR */
243 	bool cwsr_enabled;
244 	const void *cwsr_isa;
245 	unsigned int cwsr_isa_size;
246 };
247 
248 /* KGD2KFD callbacks */
249 void kgd2kfd_exit(void);
250 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
251 			struct pci_dev *pdev, const struct kfd2kgd_calls *f2g);
252 bool kgd2kfd_device_init(struct kfd_dev *kfd,
253 			const struct kgd2kfd_shared_resources *gpu_resources);
254 void kgd2kfd_device_exit(struct kfd_dev *kfd);
255 
256 enum kfd_mempool {
257 	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
258 	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
259 	KFD_MEMPOOL_FRAMEBUFFER = 3,
260 };
261 
262 /* Character device interface */
263 int kfd_chardev_init(void);
264 void kfd_chardev_exit(void);
265 struct device *kfd_chardev(void);
266 
267 /**
268  * enum kfd_unmap_queues_filter
269  *
270  * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
271  *
272  * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
273  *						running queues list.
274  *
275  * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
276  *						specific process.
277  *
278  */
279 enum kfd_unmap_queues_filter {
280 	KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
281 	KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
282 	KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
283 	KFD_UNMAP_QUEUES_FILTER_BY_PASID
284 };
285 
286 /**
287  * enum kfd_queue_type
288  *
289  * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
290  *
291  * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
292  *
293  * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
294  *
295  * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
296  */
297 enum kfd_queue_type  {
298 	KFD_QUEUE_TYPE_COMPUTE,
299 	KFD_QUEUE_TYPE_SDMA,
300 	KFD_QUEUE_TYPE_HIQ,
301 	KFD_QUEUE_TYPE_DIQ
302 };
303 
304 enum kfd_queue_format {
305 	KFD_QUEUE_FORMAT_PM4,
306 	KFD_QUEUE_FORMAT_AQL
307 };
308 
309 /**
310  * struct queue_properties
311  *
312  * @type: The queue type.
313  *
314  * @queue_id: Queue identifier.
315  *
316  * @queue_address: Queue ring buffer address.
317  *
318  * @queue_size: Queue ring buffer size.
319  *
320  * @priority: Defines the queue priority relative to other queues in the
321  * process.
322  * This is just an indication and HW scheduling may override the priority as
323  * necessary while keeping the relative prioritization.
324  * the priority granularity is from 0 to f which f is the highest priority.
325  * currently all queues are initialized with the highest priority.
326  *
327  * @queue_percent: This field is partially implemented and currently a zero in
328  * this field defines that the queue is non active.
329  *
330  * @read_ptr: User space address which points to the number of dwords the
331  * cp read from the ring buffer. This field updates automatically by the H/W.
332  *
333  * @write_ptr: Defines the number of dwords written to the ring buffer.
334  *
335  * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
336  * the queue ring buffer. This field should be similar to write_ptr and the
337  * user should update this field after he updated the write_ptr.
338  *
339  * @doorbell_off: The doorbell offset in the doorbell pci-bar.
340  *
341  * @is_interop: Defines if this is a interop queue. Interop queue means that
342  * the queue can access both graphics and compute resources.
343  *
344  * @is_evicted: Defines if the queue is evicted. Only active queues
345  * are evicted, rendering them inactive.
346  *
347  * @is_active: Defines if the queue is active or not. @is_active and
348  * @is_evicted are protected by the DQM lock.
349  *
350  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
351  * of the queue.
352  *
353  * This structure represents the queue properties for each queue no matter if
354  * it's user mode or kernel mode queue.
355  *
356  */
357 struct queue_properties {
358 	enum kfd_queue_type type;
359 	enum kfd_queue_format format;
360 	unsigned int queue_id;
361 	uint64_t queue_address;
362 	uint64_t  queue_size;
363 	uint32_t priority;
364 	uint32_t queue_percent;
365 	uint32_t *read_ptr;
366 	uint32_t *write_ptr;
367 	uint32_t __iomem *doorbell_ptr;
368 	uint32_t doorbell_off;
369 	bool is_interop;
370 	bool is_evicted;
371 	bool is_active;
372 	/* Not relevant for user mode queues in cp scheduling */
373 	unsigned int vmid;
374 	/* Relevant only for sdma queues*/
375 	uint32_t sdma_engine_id;
376 	uint32_t sdma_queue_id;
377 	uint32_t sdma_vm_addr;
378 	/* Relevant only for VI */
379 	uint64_t eop_ring_buffer_address;
380 	uint32_t eop_ring_buffer_size;
381 	uint64_t ctx_save_restore_area_address;
382 	uint32_t ctx_save_restore_area_size;
383 	uint32_t ctl_stack_size;
384 	uint64_t tba_addr;
385 	uint64_t tma_addr;
386 };
387 
388 /**
389  * struct queue
390  *
391  * @list: Queue linked list.
392  *
393  * @mqd: The queue MQD.
394  *
395  * @mqd_mem_obj: The MQD local gpu memory object.
396  *
397  * @gart_mqd_addr: The MQD gart mc address.
398  *
399  * @properties: The queue properties.
400  *
401  * @mec: Used only in no cp scheduling mode and identifies to micro engine id
402  *	 that the queue should be execute on.
403  *
404  * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
405  *	  id.
406  *
407  * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
408  *
409  * @process: The kfd process that created this queue.
410  *
411  * @device: The kfd device that created this queue.
412  *
413  * This structure represents user mode compute queues.
414  * It contains all the necessary data to handle such queues.
415  *
416  */
417 
418 struct queue {
419 	struct list_head list;
420 	void *mqd;
421 	struct kfd_mem_obj *mqd_mem_obj;
422 	uint64_t gart_mqd_addr;
423 	struct queue_properties properties;
424 
425 	uint32_t mec;
426 	uint32_t pipe;
427 	uint32_t queue;
428 
429 	unsigned int sdma_id;
430 
431 	struct kfd_process	*process;
432 	struct kfd_dev		*device;
433 };
434 
435 /*
436  * Please read the kfd_mqd_manager.h description.
437  */
438 enum KFD_MQD_TYPE {
439 	KFD_MQD_TYPE_COMPUTE = 0,	/* for no cp scheduling */
440 	KFD_MQD_TYPE_HIQ,		/* for hiq */
441 	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
442 	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
443 	KFD_MQD_TYPE_MAX
444 };
445 
446 struct scheduling_resources {
447 	unsigned int vmid_mask;
448 	enum kfd_queue_type type;
449 	uint64_t queue_mask;
450 	uint64_t gws_mask;
451 	uint32_t oac_mask;
452 	uint32_t gds_heap_base;
453 	uint32_t gds_heap_size;
454 };
455 
456 struct process_queue_manager {
457 	/* data */
458 	struct kfd_process	*process;
459 	struct list_head	queues;
460 	unsigned long		*queue_slot_bitmap;
461 };
462 
463 struct qcm_process_device {
464 	/* The Device Queue Manager that owns this data */
465 	struct device_queue_manager *dqm;
466 	struct process_queue_manager *pqm;
467 	/* Queues list */
468 	struct list_head queues_list;
469 	struct list_head priv_queue_list;
470 
471 	unsigned int queue_count;
472 	unsigned int vmid;
473 	bool is_debug;
474 	unsigned int evicted; /* eviction counter, 0=active */
475 
476 	/* This flag tells if we should reset all wavefronts on
477 	 * process termination
478 	 */
479 	bool reset_wavefronts;
480 
481 	/*
482 	 * All the memory management data should be here too
483 	 */
484 	uint64_t gds_context_area;
485 	uint32_t sh_mem_config;
486 	uint32_t sh_mem_bases;
487 	uint32_t sh_mem_ape1_base;
488 	uint32_t sh_mem_ape1_limit;
489 	uint32_t page_table_base;
490 	uint32_t gds_size;
491 	uint32_t num_gws;
492 	uint32_t num_oac;
493 	uint32_t sh_hidden_private_base;
494 
495 	/* CWSR memory */
496 	void *cwsr_kaddr;
497 	uint64_t cwsr_base;
498 	uint64_t tba_addr;
499 	uint64_t tma_addr;
500 
501 	/* IB memory */
502 	uint64_t ib_base;
503 	void *ib_kaddr;
504 };
505 
506 /* KFD Memory Eviction */
507 
508 /* Approx. wait time before attempting to restore evicted BOs */
509 #define PROCESS_RESTORE_TIME_MS 100
510 /* Approx. back off time if restore fails due to lack of memory */
511 #define PROCESS_BACK_OFF_TIME_MS 100
512 /* Approx. time before evicting the process again */
513 #define PROCESS_ACTIVE_TIME_MS 10
514 
515 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
516 					       struct dma_fence *fence);
517 
518 /* 8 byte handle containing GPU ID in the most significant 4 bytes and
519  * idr_handle in the least significant 4 bytes
520  */
521 #define MAKE_HANDLE(gpu_id, idr_handle) \
522 	(((uint64_t)(gpu_id) << 32) + idr_handle)
523 #define GET_GPU_ID(handle) (handle >> 32)
524 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
525 
526 enum kfd_pdd_bound {
527 	PDD_UNBOUND = 0,
528 	PDD_BOUND,
529 	PDD_BOUND_SUSPENDED,
530 };
531 
532 /* Data that is per-process-per device. */
533 struct kfd_process_device {
534 	/*
535 	 * List of all per-device data for a process.
536 	 * Starts from kfd_process.per_device_data.
537 	 */
538 	struct list_head per_device_list;
539 
540 	/* The device that owns this data. */
541 	struct kfd_dev *dev;
542 
543 	/* The process that owns this kfd_process_device. */
544 	struct kfd_process *process;
545 
546 	/* per-process-per device QCM data structure */
547 	struct qcm_process_device qpd;
548 
549 	/*Apertures*/
550 	uint64_t lds_base;
551 	uint64_t lds_limit;
552 	uint64_t gpuvm_base;
553 	uint64_t gpuvm_limit;
554 	uint64_t scratch_base;
555 	uint64_t scratch_limit;
556 
557 	/* VM context for GPUVM allocations */
558 	struct file *drm_file;
559 	void *vm;
560 
561 	/* GPUVM allocations storage */
562 	struct idr alloc_idr;
563 
564 	/* Flag used to tell the pdd has dequeued from the dqm.
565 	 * This is used to prevent dev->dqm->ops.process_termination() from
566 	 * being called twice when it is already called in IOMMU callback
567 	 * function.
568 	 */
569 	bool already_dequeued;
570 
571 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
572 	enum kfd_pdd_bound bound;
573 };
574 
575 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
576 
577 /* Process data */
578 struct kfd_process {
579 	/*
580 	 * kfd_process are stored in an mm_struct*->kfd_process*
581 	 * hash table (kfd_processes in kfd_process.c)
582 	 */
583 	struct hlist_node kfd_processes;
584 
585 	/*
586 	 * Opaque pointer to mm_struct. We don't hold a reference to
587 	 * it so it should never be dereferenced from here. This is
588 	 * only used for looking up processes by their mm.
589 	 */
590 	void *mm;
591 
592 	struct kref ref;
593 	struct work_struct release_work;
594 
595 	struct mutex mutex;
596 
597 	/*
598 	 * In any process, the thread that started main() is the lead
599 	 * thread and outlives the rest.
600 	 * It is here because amd_iommu_bind_pasid wants a task_struct.
601 	 * It can also be used for safely getting a reference to the
602 	 * mm_struct of the process.
603 	 */
604 	struct task_struct *lead_thread;
605 
606 	/* We want to receive a notification when the mm_struct is destroyed */
607 	struct mmu_notifier mmu_notifier;
608 
609 	/* Use for delayed freeing of kfd_process structure */
610 	struct rcu_head	rcu;
611 
612 	unsigned int pasid;
613 	unsigned int doorbell_index;
614 
615 	/*
616 	 * List of kfd_process_device structures,
617 	 * one for each device the process is using.
618 	 */
619 	struct list_head per_device_data;
620 
621 	struct process_queue_manager pqm;
622 
623 	/*Is the user space process 32 bit?*/
624 	bool is_32bit_user_mode;
625 
626 	/* Event-related data */
627 	struct mutex event_mutex;
628 	/* Event ID allocator and lookup */
629 	struct idr event_idr;
630 	/* Event page */
631 	struct kfd_signal_page *signal_page;
632 	size_t signal_mapped_size;
633 	size_t signal_event_count;
634 	bool signal_event_limit_reached;
635 
636 	/* Information used for memory eviction */
637 	void *kgd_process_info;
638 	/* Eviction fence that is attached to all the BOs of this process. The
639 	 * fence will be triggered during eviction and new one will be created
640 	 * during restore
641 	 */
642 	struct dma_fence *ef;
643 
644 	/* Work items for evicting and restoring BOs */
645 	struct delayed_work eviction_work;
646 	struct delayed_work restore_work;
647 	/* seqno of the last scheduled eviction */
648 	unsigned int last_eviction_seqno;
649 	/* Approx. the last timestamp (in jiffies) when the process was
650 	 * restored after an eviction
651 	 */
652 	unsigned long last_restore_timestamp;
653 };
654 
655 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
656 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
657 extern struct srcu_struct kfd_processes_srcu;
658 
659 /**
660  * Ioctl function type.
661  *
662  * \param filep pointer to file structure.
663  * \param p amdkfd process pointer.
664  * \param data pointer to arg that was copied from user.
665  */
666 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
667 				void *data);
668 
669 struct amdkfd_ioctl_desc {
670 	unsigned int cmd;
671 	int flags;
672 	amdkfd_ioctl_t *func;
673 	unsigned int cmd_drv;
674 	const char *name;
675 };
676 
677 int kfd_process_create_wq(void);
678 void kfd_process_destroy_wq(void);
679 struct kfd_process *kfd_create_process(struct file *filep);
680 struct kfd_process *kfd_get_process(const struct task_struct *);
681 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
682 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
683 void kfd_unref_process(struct kfd_process *p);
684 void kfd_suspend_all_processes(void);
685 int kfd_resume_all_processes(void);
686 
687 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
688 			       struct file *drm_file);
689 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
690 						struct kfd_process *p);
691 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
692 							struct kfd_process *p);
693 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
694 							struct kfd_process *p);
695 
696 int kfd_reserved_mem_mmap(struct kfd_process *process,
697 			  struct vm_area_struct *vma);
698 
699 /* KFD process API for creating and translating handles */
700 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
701 					void *mem);
702 void *kfd_process_device_translate_handle(struct kfd_process_device *p,
703 					int handle);
704 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
705 					int handle);
706 
707 /* Process device data iterator */
708 struct kfd_process_device *kfd_get_first_process_device_data(
709 							struct kfd_process *p);
710 struct kfd_process_device *kfd_get_next_process_device_data(
711 						struct kfd_process *p,
712 						struct kfd_process_device *pdd);
713 bool kfd_has_process_device_data(struct kfd_process *p);
714 
715 /* PASIDs */
716 int kfd_pasid_init(void);
717 void kfd_pasid_exit(void);
718 bool kfd_set_pasid_limit(unsigned int new_limit);
719 unsigned int kfd_get_pasid_limit(void);
720 unsigned int kfd_pasid_alloc(void);
721 void kfd_pasid_free(unsigned int pasid);
722 
723 /* Doorbells */
724 int kfd_doorbell_init(struct kfd_dev *kfd);
725 void kfd_doorbell_fini(struct kfd_dev *kfd);
726 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
727 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
728 					unsigned int *doorbell_off);
729 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
730 u32 read_kernel_doorbell(u32 __iomem *db);
731 void write_kernel_doorbell(u32 __iomem *db, u32 value);
732 unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
733 					struct kfd_process *process,
734 					unsigned int queue_id);
735 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
736 					struct kfd_process *process);
737 int kfd_alloc_process_doorbells(struct kfd_process *process);
738 void kfd_free_process_doorbells(struct kfd_process *process);
739 
740 /* GTT Sub-Allocator */
741 
742 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
743 			struct kfd_mem_obj **mem_obj);
744 
745 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
746 
747 extern struct device *kfd_device;
748 
749 /* Topology */
750 int kfd_topology_init(void);
751 void kfd_topology_shutdown(void);
752 int kfd_topology_add_device(struct kfd_dev *gpu);
753 int kfd_topology_remove_device(struct kfd_dev *gpu);
754 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
755 						uint32_t proximity_domain);
756 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
757 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
758 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
759 int kfd_numa_node_to_apic_id(int numa_node_id);
760 
761 /* Interrupts */
762 int kfd_interrupt_init(struct kfd_dev *dev);
763 void kfd_interrupt_exit(struct kfd_dev *dev);
764 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
765 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry);
766 bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
767 
768 /* Power Management */
769 void kgd2kfd_suspend(struct kfd_dev *kfd);
770 int kgd2kfd_resume(struct kfd_dev *kfd);
771 
772 /* amdkfd Apertures */
773 int kfd_init_apertures(struct kfd_process *process);
774 
775 /* Queue Context Management */
776 int init_queue(struct queue **q, const struct queue_properties *properties);
777 void uninit_queue(struct queue *q);
778 void print_queue_properties(struct queue_properties *q);
779 void print_queue(struct queue *q);
780 
781 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
782 					struct kfd_dev *dev);
783 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
784 		struct kfd_dev *dev);
785 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
786 		struct kfd_dev *dev);
787 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
788 		struct kfd_dev *dev);
789 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
790 		struct kfd_dev *dev);
791 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
792 void device_queue_manager_uninit(struct device_queue_manager *dqm);
793 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
794 					enum kfd_queue_type type);
795 void kernel_queue_uninit(struct kernel_queue *kq);
796 
797 /* Process Queue Manager */
798 struct process_queue_node {
799 	struct queue *q;
800 	struct kernel_queue *kq;
801 	struct list_head process_queue_list;
802 };
803 
804 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
805 void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
806 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
807 void pqm_uninit(struct process_queue_manager *pqm);
808 int pqm_create_queue(struct process_queue_manager *pqm,
809 			    struct kfd_dev *dev,
810 			    struct file *f,
811 			    struct queue_properties *properties,
812 			    unsigned int *qid);
813 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
814 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
815 			struct queue_properties *p);
816 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
817 						unsigned int qid);
818 
819 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
820 				unsigned int fence_value,
821 				unsigned int timeout_ms);
822 
823 /* Packet Manager */
824 
825 #define KFD_FENCE_COMPLETED (100)
826 #define KFD_FENCE_INIT   (10)
827 
828 struct packet_manager {
829 	struct device_queue_manager *dqm;
830 	struct kernel_queue *priv_queue;
831 	struct mutex lock;
832 	bool allocated;
833 	struct kfd_mem_obj *ib_buffer_obj;
834 	unsigned int ib_size_bytes;
835 };
836 
837 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
838 void pm_uninit(struct packet_manager *pm);
839 int pm_send_set_resources(struct packet_manager *pm,
840 				struct scheduling_resources *res);
841 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
842 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
843 				uint32_t fence_value);
844 
845 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
846 			enum kfd_unmap_queues_filter mode,
847 			uint32_t filter_param, bool reset,
848 			unsigned int sdma_engine);
849 
850 void pm_release_ib(struct packet_manager *pm);
851 
852 uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
853 
854 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
855 
856 /* Events */
857 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
858 extern const struct kfd_device_global_init_class device_global_init_class_cik;
859 
860 void kfd_event_init_process(struct kfd_process *p);
861 void kfd_event_free_process(struct kfd_process *p);
862 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
863 int kfd_wait_on_events(struct kfd_process *p,
864 		       uint32_t num_events, void __user *data,
865 		       bool all, uint32_t user_timeout_ms,
866 		       uint32_t *wait_result);
867 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
868 				uint32_t valid_id_bits);
869 void kfd_signal_iommu_event(struct kfd_dev *dev,
870 		unsigned int pasid, unsigned long address,
871 		bool is_write_requested, bool is_execute_requested);
872 void kfd_signal_hw_exception_event(unsigned int pasid);
873 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
874 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
875 int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
876 		       uint64_t size);
877 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
878 		     uint32_t event_type, bool auto_reset, uint32_t node_id,
879 		     uint32_t *event_id, uint32_t *event_trigger_data,
880 		     uint64_t *event_page_offset, uint32_t *event_slot_index);
881 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
882 
883 void kfd_flush_tlb(struct kfd_process_device *pdd);
884 
885 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
886 
887 /* Debugfs */
888 #if defined(CONFIG_DEBUG_FS)
889 
890 void kfd_debugfs_init(void);
891 void kfd_debugfs_fini(void);
892 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
893 int pqm_debugfs_mqds(struct seq_file *m, void *data);
894 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
895 int dqm_debugfs_hqds(struct seq_file *m, void *data);
896 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
897 int pm_debugfs_runlist(struct seq_file *m, void *data);
898 
899 #else
900 
901 static inline void kfd_debugfs_init(void) {}
902 static inline void kfd_debugfs_fini(void) {}
903 
904 #endif
905 
906 #endif
907