1e28740ecSOded Gabbay /* 2e28740ecSOded Gabbay * Copyright 2014 Advanced Micro Devices, Inc. 3e28740ecSOded Gabbay * 4e28740ecSOded Gabbay * Permission is hereby granted, free of charge, to any person obtaining a 5e28740ecSOded Gabbay * copy of this software and associated documentation files (the "Software"), 6e28740ecSOded Gabbay * to deal in the Software without restriction, including without limitation 7e28740ecSOded Gabbay * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8e28740ecSOded Gabbay * and/or sell copies of the Software, and to permit persons to whom the 9e28740ecSOded Gabbay * Software is furnished to do so, subject to the following conditions: 10e28740ecSOded Gabbay * 11e28740ecSOded Gabbay * The above copyright notice and this permission notice shall be included in 12e28740ecSOded Gabbay * all copies or substantial portions of the Software. 13e28740ecSOded Gabbay * 14e28740ecSOded Gabbay * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15e28740ecSOded Gabbay * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16e28740ecSOded Gabbay * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17e28740ecSOded Gabbay * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18e28740ecSOded Gabbay * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19e28740ecSOded Gabbay * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20e28740ecSOded Gabbay * OTHER DEALINGS IN THE SOFTWARE. 21e28740ecSOded Gabbay */ 22e28740ecSOded Gabbay 23e28740ecSOded Gabbay /* 24e28740ecSOded Gabbay * This file defines the private interface between the 25e28740ecSOded Gabbay * AMD kernel graphics drivers and the AMD KFD. 26e28740ecSOded Gabbay */ 27e28740ecSOded Gabbay 28e28740ecSOded Gabbay #ifndef KGD_KFD_INTERFACE_H_INCLUDED 29e28740ecSOded Gabbay #define KGD_KFD_INTERFACE_H_INCLUDED 30e28740ecSOded Gabbay 31e28740ecSOded Gabbay #include <linux/types.h> 32d0b63bb3SAndres Rodriguez #include <linux/bitmap.h> 33d8d019ccSFelix Kuehling #include <linux/dma-fence.h> 34*97e3c6a8SMukul Joshi #include "amdgpu_irq.h" 35*97e3c6a8SMukul Joshi #include "amdgpu_gfx.h" 36e28740ecSOded Gabbay 37e28740ecSOded Gabbay struct pci_dev; 38c6c57446SGraham Sider struct amdgpu_device; 39e28740ecSOded Gabbay 40e28740ecSOded Gabbay struct kfd_dev; 41e28740ecSOded Gabbay struct kgd_mem; 42e28740ecSOded Gabbay 4370539bd7SFelix Kuehling enum kfd_preempt_type { 4470539bd7SFelix Kuehling KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, 4570539bd7SFelix Kuehling KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 46b53ef0dfSMukul Joshi KFD_PREEMPT_TYPE_WAVEFRONT_SAVE 4770539bd7SFelix Kuehling }; 4870539bd7SFelix Kuehling 49b97dfa27Sshaoyunl struct kfd_vm_fault_info { 50b97dfa27Sshaoyunl uint64_t page_addr; 51b97dfa27Sshaoyunl uint32_t vmid; 52b97dfa27Sshaoyunl uint32_t mc_id; 53b97dfa27Sshaoyunl uint32_t status; 54b97dfa27Sshaoyunl bool prot_valid; 55b97dfa27Sshaoyunl bool prot_read; 56b97dfa27Sshaoyunl bool prot_write; 57b97dfa27Sshaoyunl bool prot_exec; 58b97dfa27Sshaoyunl }; 59b97dfa27Sshaoyunl 608cce58feSFlora Cui struct kfd_cu_info { 618cce58feSFlora Cui uint32_t num_shader_engines; 628cce58feSFlora Cui uint32_t num_shader_arrays_per_engine; 638cce58feSFlora Cui uint32_t num_cu_per_sh; 648cce58feSFlora Cui uint32_t cu_active_number; 658cce58feSFlora Cui uint32_t cu_ao_mask; 668cce58feSFlora Cui uint32_t simd_per_cu; 678cce58feSFlora Cui uint32_t max_waves_per_simd; 688cce58feSFlora Cui uint32_t wave_front_size; 698cce58feSFlora Cui uint32_t max_scratch_slots_per_cu; 708cce58feSFlora Cui uint32_t lds_size; 71*97e3c6a8SMukul Joshi uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; 728cce58feSFlora Cui }; 738cce58feSFlora Cui 744073ed78SHarish Kasiviswanathan /* For getting GPU local memory information from KGD */ 754073ed78SHarish Kasiviswanathan struct kfd_local_mem_info { 764073ed78SHarish Kasiviswanathan uint64_t local_mem_size_private; 774073ed78SHarish Kasiviswanathan uint64_t local_mem_size_public; 784073ed78SHarish Kasiviswanathan uint32_t vram_width; 794073ed78SHarish Kasiviswanathan uint32_t mem_clk_max; 804073ed78SHarish Kasiviswanathan }; 814073ed78SHarish Kasiviswanathan 82e28740ecSOded Gabbay enum kgd_memory_pool { 83e28740ecSOded Gabbay KGD_POOL_SYSTEM_CACHEABLE = 1, 84e28740ecSOded Gabbay KGD_POOL_SYSTEM_WRITECOMBINE = 2, 85e28740ecSOded Gabbay KGD_POOL_FRAMEBUFFER = 3, 86e28740ecSOded Gabbay }; 87e28740ecSOded Gabbay 882690262eSAmber Lin /** 892690262eSAmber Lin * enum kfd_sched_policy 902690262eSAmber Lin * 912690262eSAmber Lin * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 922690262eSAmber Lin * scheduling. In this scheduling mode we're using the firmware code to 932690262eSAmber Lin * schedule the user mode queues and kernel queues such as HIQ and DIQ. 942690262eSAmber Lin * the HIQ queue is used as a special queue that dispatches the configuration 952690262eSAmber Lin * to the cp and the user mode queues list that are currently running. 962690262eSAmber Lin * the DIQ queue is a debugging queue that dispatches debugging commands to the 972690262eSAmber Lin * firmware. 982690262eSAmber Lin * in this scheduling mode user mode queues over subscription feature is 992690262eSAmber Lin * enabled. 1002690262eSAmber Lin * 1012690262eSAmber Lin * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 1022690262eSAmber Lin * subscription feature disabled. 1032690262eSAmber Lin * 1042690262eSAmber Lin * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 1052690262eSAmber Lin * set the command processor registers and sets the queues "manually". This 1062690262eSAmber Lin * mode is used *ONLY* for debugging proposes. 1072690262eSAmber Lin * 1082690262eSAmber Lin */ 1092690262eSAmber Lin enum kfd_sched_policy { 1102690262eSAmber Lin KFD_SCHED_POLICY_HWS = 0, 1112690262eSAmber Lin KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 1122690262eSAmber Lin KFD_SCHED_POLICY_NO_HWS 1132690262eSAmber Lin }; 1142690262eSAmber Lin 115e28740ecSOded Gabbay struct kgd2kfd_shared_resources { 116e28740ecSOded Gabbay /* Bit n == 1 means VMID n is available for KFD. */ 117e28740ecSOded Gabbay unsigned int compute_vmid_bitmap; 118e28740ecSOded Gabbay 119d0b63bb3SAndres Rodriguez /* number of pipes per mec */ 120d0b63bb3SAndres Rodriguez uint32_t num_pipe_per_mec; 121d0b63bb3SAndres Rodriguez 122d0b63bb3SAndres Rodriguez /* number of queues per pipe */ 123d0b63bb3SAndres Rodriguez uint32_t num_queue_per_pipe; 124d0b63bb3SAndres Rodriguez 125d0b63bb3SAndres Rodriguez /* Bit n == 1 means Queue n is available for KFD */ 126e6945304SYong Zhao DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); 127e28740ecSOded Gabbay 128234441ddSYong Zhao /* SDMA doorbell assignments (SOC15 and later chips only). Only 129642a0e80SFelix Kuehling * specific doorbells are routed to each SDMA engine. Others 130642a0e80SFelix Kuehling * are routed to IH and VCN. They are not usable by the CP. 131642a0e80SFelix Kuehling */ 132234441ddSYong Zhao uint32_t *sdma_doorbell_idx; 1331f86805aSYong Zhao 1341f86805aSYong Zhao /* From SOC15 onward, the doorbell index range not usable for CP 1351f86805aSYong Zhao * queues. 1361f86805aSYong Zhao */ 1371f86805aSYong Zhao uint32_t non_cp_doorbells_start; 1381f86805aSYong Zhao uint32_t non_cp_doorbells_end; 139642a0e80SFelix Kuehling 140e28740ecSOded Gabbay /* Base address of doorbell aperture. */ 141e28740ecSOded Gabbay phys_addr_t doorbell_physical_address; 142e28740ecSOded Gabbay 143e28740ecSOded Gabbay /* Size in bytes of doorbell aperture. */ 144e28740ecSOded Gabbay size_t doorbell_aperture_size; 145e28740ecSOded Gabbay 146e28740ecSOded Gabbay /* Number of bytes at start of aperture reserved for KGD. */ 147e28740ecSOded Gabbay size_t doorbell_start_offset; 148155494dbSFelix Kuehling 149155494dbSFelix Kuehling /* GPUVM address space size in bytes */ 150155494dbSFelix Kuehling uint64_t gpuvm_size; 151155494dbSFelix Kuehling 152155494dbSFelix Kuehling /* Minor device number of the render node */ 153155494dbSFelix Kuehling int drm_render_minor; 1540c663695SDivya Shikre 155cc009e61SMukul Joshi bool enable_mes; 156e28740ecSOded Gabbay }; 157e28740ecSOded Gabbay 158fb31a0c9SYong Zhao struct tile_config { 159fb31a0c9SYong Zhao uint32_t *tile_config_ptr; 160fb31a0c9SYong Zhao uint32_t *macro_tile_config_ptr; 161fb31a0c9SYong Zhao uint32_t num_tile_configs; 162fb31a0c9SYong Zhao uint32_t num_macro_tile_configs; 163fb31a0c9SYong Zhao 164fb31a0c9SYong Zhao uint32_t gb_addr_config; 165fb31a0c9SYong Zhao uint32_t num_banks; 166fb31a0c9SYong Zhao uint32_t num_ranks; 167fb31a0c9SYong Zhao }; 168fb31a0c9SYong Zhao 1692690262eSAmber Lin #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 170a46a2cd1SFelix Kuehling 171e28740ecSOded Gabbay /** 172e28740ecSOded Gabbay * struct kfd2kgd_calls 173e28740ecSOded Gabbay * 174e28740ecSOded Gabbay * @program_sh_mem_settings: A function that should initiate the memory 175e28740ecSOded Gabbay * properties such as main aperture memory type (cache / non cached) and 176e28740ecSOded Gabbay * secondary aperture base address, size and memory type. 177e28740ecSOded Gabbay * This function is used only for no cp scheduling mode. 178e28740ecSOded Gabbay * 179e28740ecSOded Gabbay * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp 180e28740ecSOded Gabbay * scheduling mode. Only used for no cp scheduling mode. 181e28740ecSOded Gabbay * 182e28740ecSOded Gabbay * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp 183e28740ecSOded Gabbay * sceduling mode. 184e28740ecSOded Gabbay * 18585ea7d07SBen Goz * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. 18685ea7d07SBen Goz * used only for no HWS mode. 18785ea7d07SBen Goz * 18880c195f5SFelix Kuehling * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. 18980c195f5SFelix Kuehling * Array is allocated with kmalloc, needs to be freed with kfree by caller. 19080c195f5SFelix Kuehling * 19180c195f5SFelix Kuehling * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs. 19280c195f5SFelix Kuehling * Array is allocated with kmalloc, needs to be freed with kfree by caller. 19380c195f5SFelix Kuehling * 194e28740ecSOded Gabbay * @hqd_is_occupies: Checks if a hqd slot is occupied. 195e28740ecSOded Gabbay * 196e28740ecSOded Gabbay * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. 197e28740ecSOded Gabbay * 19885ea7d07SBen Goz * @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied. 19985ea7d07SBen Goz * 20085ea7d07SBen Goz * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that 20185ea7d07SBen Goz * SDMA hqd slot. 20285ea7d07SBen Goz * 20309e56abbSMoses Reuben * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. 20409e56abbSMoses Reuben * Only used for no cp scheduling mode 20509e56abbSMoses Reuben * 206a46a2cd1SFelix Kuehling * @set_vm_context_page_table_base: Program page table base for a VMID 207a46a2cd1SFelix Kuehling * 208a46a2cd1SFelix Kuehling * @invalidate_tlbs: Invalidate TLBs for a specific PASID 209a46a2cd1SFelix Kuehling * 210a46a2cd1SFelix Kuehling * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID 211a46a2cd1SFelix Kuehling * 21258e69886SLan Xiao * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the 21358e69886SLan Xiao * IH ring entry. This function allows the KFD ISR to get the VMID 21458e69886SLan Xiao * from the fault status register as early as possible. 21558e69886SLan Xiao * 21643a4bc82SRamesh Errabolu * @get_cu_occupancy: Function pointer that returns to caller the number 21743a4bc82SRamesh Errabolu * of wave fronts that are in flight for all of the queues of a process 21843a4bc82SRamesh Errabolu * as identified by its pasid. It is important to note that the value 21943a4bc82SRamesh Errabolu * returned by this function is a snapshot of current moment and cannot 22043a4bc82SRamesh Errabolu * guarantee any minimum for the number of waves in-flight. This function 22143a4bc82SRamesh Errabolu * is defined for devices that belong to GFX9 and later GFX families. Care 22243a4bc82SRamesh Errabolu * must be taken in calling this function as it is not defined for devices 22343a4bc82SRamesh Errabolu * that belong to GFX8 and below GFX families. 2240c663695SDivya Shikre * 225e28740ecSOded Gabbay * This structure contains function pointers to services that the kgd driver 226e28740ecSOded Gabbay * provides to amdkfd driver. 227e28740ecSOded Gabbay * 228e28740ecSOded Gabbay */ 229e28740ecSOded Gabbay struct kfd2kgd_calls { 230e28740ecSOded Gabbay /* Register access functions */ 2313356c38dSGraham Sider void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid, 232e28740ecSOded Gabbay uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, 233e2069a7bSMukul Joshi uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases, 234e2069a7bSMukul Joshi uint32_t inst); 235e28740ecSOded Gabbay 2363356c38dSGraham Sider int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid, 237e2069a7bSMukul Joshi unsigned int vmid, uint32_t inst); 238e28740ecSOded Gabbay 239e2069a7bSMukul Joshi int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id, 240e2069a7bSMukul Joshi uint32_t inst); 241d36b94fcSOded Gabbay 242420185fdSGraham Sider int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, 24370539bd7SFelix Kuehling uint32_t queue_id, uint32_t __user *wptr, 24470539bd7SFelix Kuehling uint32_t wptr_shift, uint32_t wptr_mask, 245e2069a7bSMukul Joshi struct mm_struct *mm, uint32_t inst); 246e28740ecSOded Gabbay 247420185fdSGraham Sider int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd, 24835cd89d5SAaron Liu uint32_t pipe_id, uint32_t queue_id, 249e2069a7bSMukul Joshi uint32_t doorbell_off, uint32_t inst); 25035cd89d5SAaron Liu 251420185fdSGraham Sider int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd, 2527ce66118SFelix Kuehling uint32_t __user *wptr, struct mm_struct *mm); 25385ea7d07SBen Goz 254420185fdSGraham Sider int (*hqd_dump)(struct amdgpu_device *adev, 25580c195f5SFelix Kuehling uint32_t pipe_id, uint32_t queue_id, 256e2069a7bSMukul Joshi uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst); 25780c195f5SFelix Kuehling 258420185fdSGraham Sider int (*hqd_sdma_dump)(struct amdgpu_device *adev, 25980c195f5SFelix Kuehling uint32_t engine_id, uint32_t queue_id, 26080c195f5SFelix Kuehling uint32_t (**dump)[2], uint32_t *n_regs); 26180c195f5SFelix Kuehling 262420185fdSGraham Sider bool (*hqd_is_occupied)(struct amdgpu_device *adev, 263420185fdSGraham Sider uint64_t queue_address, uint32_t pipe_id, 264e2069a7bSMukul Joshi uint32_t queue_id, uint32_t inst); 26585ea7d07SBen Goz 266420185fdSGraham Sider int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd, 267e688ba3eSNathan Chancellor enum kfd_preempt_type reset_type, 268e688ba3eSNathan Chancellor unsigned int timeout, uint32_t pipe_id, 269e2069a7bSMukul Joshi uint32_t queue_id, uint32_t inst); 27085ea7d07SBen Goz 271420185fdSGraham Sider bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd); 272420185fdSGraham Sider 273420185fdSGraham Sider int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, 27485ea7d07SBen Goz unsigned int timeout); 27585ea7d07SBen Goz 2763356c38dSGraham Sider int (*wave_control_execute)(struct amdgpu_device *adev, 277a6186f4dSYair Shachar uint32_t gfx_index_val, 278e2069a7bSMukul Joshi uint32_t sq_cmd, uint32_t inst); 2793356c38dSGraham Sider bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, 28056fc40abSYong Zhao uint8_t vmid, 28156fc40abSYong Zhao uint16_t *p_pasid); 282a6186f4dSYair Shachar 283c637b36aSYong Zhao /* No longer needed from GFXv9 onward. The scratch base address is 284c637b36aSYong Zhao * passed to the shader by the CP. It's the user mode driver's 285c637b36aSYong Zhao * responsibility. 286c637b36aSYong Zhao */ 2873356c38dSGraham Sider void (*set_scratch_backing_va)(struct amdgpu_device *adev, 28809e56abbSMoses Reuben uint64_t va, uint32_t vmid); 289c637b36aSYong Zhao 2903356c38dSGraham Sider void (*set_vm_context_page_table_base)(struct amdgpu_device *adev, 291e715c6d0SShaoyun Liu uint32_t vmid, uint64_t page_table_base); 2923356c38dSGraham Sider uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev); 2936ef22c39SShaoyun Liu 29408ca7122SJonathan Kim uint32_t (*enable_debug_trap)(struct amdgpu_device *adev, 29508ca7122SJonathan Kim bool restore_dbg_registers, 29608ca7122SJonathan Kim uint32_t vmid); 29708ca7122SJonathan Kim uint32_t (*disable_debug_trap)(struct amdgpu_device *adev, 29808ca7122SJonathan Kim bool keep_trap_enabled, 29908ca7122SJonathan Kim uint32_t vmid); 30008ca7122SJonathan Kim int (*validate_trap_override_request)(struct amdgpu_device *adev, 30108ca7122SJonathan Kim uint32_t trap_override, 30208ca7122SJonathan Kim uint32_t *trap_mask_supported); 30308ca7122SJonathan Kim uint32_t (*set_wave_launch_trap_override)(struct amdgpu_device *adev, 30408ca7122SJonathan Kim uint32_t vmid, 30508ca7122SJonathan Kim uint32_t trap_override, 30608ca7122SJonathan Kim uint32_t trap_mask_bits, 30708ca7122SJonathan Kim uint32_t trap_mask_request, 30808ca7122SJonathan Kim uint32_t *trap_mask_prev, 30908ca7122SJonathan Kim uint32_t kfd_dbg_trap_cntl_prev); 31008ca7122SJonathan Kim uint32_t (*set_wave_launch_mode)(struct amdgpu_device *adev, 31108ca7122SJonathan Kim uint8_t wave_launch_mode, 31208ca7122SJonathan Kim uint32_t vmid); 31308ca7122SJonathan Kim uint32_t (*set_address_watch)(struct amdgpu_device *adev, 31408ca7122SJonathan Kim uint64_t watch_address, 31508ca7122SJonathan Kim uint32_t watch_address_mask, 31608ca7122SJonathan Kim uint32_t watch_id, 31708ca7122SJonathan Kim uint32_t watch_mode, 318036e348fSEric Huang uint32_t debug_vmid, 319036e348fSEric Huang uint32_t inst); 32008ca7122SJonathan Kim uint32_t (*clear_address_watch)(struct amdgpu_device *adev, 32108ca7122SJonathan Kim uint32_t watch_id); 32208ca7122SJonathan Kim void (*get_iq_wait_times)(struct amdgpu_device *adev, 323036e348fSEric Huang uint32_t *wait_times, 324036e348fSEric Huang uint32_t inst); 32508ca7122SJonathan Kim void (*build_grace_period_packet_info)(struct amdgpu_device *adev, 32608ca7122SJonathan Kim uint32_t wait_times, 32708ca7122SJonathan Kim uint32_t grace_period, 32808ca7122SJonathan Kim uint32_t *reg_offset, 32981faf9e0SMukul Joshi uint32_t *reg_data); 3303356c38dSGraham Sider void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, 331e2069a7bSMukul Joshi int *wave_cnt, int *max_waves_per_cu, uint32_t inst); 3323356c38dSGraham Sider void (*program_trap_handler_settings)(struct amdgpu_device *adev, 333e2069a7bSMukul Joshi uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 334e2069a7bSMukul Joshi uint32_t inst); 335e28740ecSOded Gabbay }; 336e28740ecSOded Gabbay 337e28740ecSOded Gabbay #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ 338