1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 #include "kfd_priv.h" 24 #include <linux/mm.h> 25 #include <linux/mman.h> 26 #include <linux/slab.h> 27 #include <linux/io.h> 28 #include <linux/idr.h> 29 30 /* 31 * This extension supports a kernel level doorbells management for the 32 * kernel queues using the first doorbell page reserved for the kernel. 33 */ 34 35 /* 36 * Each device exposes a doorbell aperture, a PCI MMIO aperture that 37 * receives 32-bit writes that are passed to queues as wptr values. 38 * The doorbells are intended to be written by applications as part 39 * of queueing work on user-mode queues. 40 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. 41 * We map the doorbell address space into user-mode when a process creates 42 * its first queue on each device. 43 * Although the mapping is done by KFD, it is equivalent to an mmap of 44 * the /dev/kfd with the particular device encoded in the mmap offset. 45 * There will be other uses for mmap of /dev/kfd, so only a range of 46 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. 47 */ 48 49 /* # of doorbell bytes allocated for each process. */ 50 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) 51 { 52 if (!kfd->shared_resources.enable_mes) 53 return roundup(kfd->device_info.doorbell_size * 54 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 55 PAGE_SIZE); 56 else 57 return amdgpu_mes_doorbell_process_slice( 58 (struct amdgpu_device *)kfd->adev); 59 } 60 61 /* Doorbell calculations for device init. */ 62 int kfd_doorbell_init(struct kfd_dev *kfd) 63 { 64 size_t doorbell_start_offset; 65 size_t doorbell_aperture_size; 66 size_t doorbell_process_limit; 67 68 /* 69 * With MES enabled, just set the doorbell base as it is needed 70 * to calculate doorbell physical address. 71 */ 72 if (kfd->shared_resources.enable_mes) { 73 kfd->doorbell_base = 74 kfd->shared_resources.doorbell_physical_address; 75 return 0; 76 } 77 78 /* 79 * We start with calculations in bytes because the input data might 80 * only be byte-aligned. 81 * Only after we have done the rounding can we assume any alignment. 82 */ 83 84 doorbell_start_offset = 85 roundup(kfd->shared_resources.doorbell_start_offset, 86 kfd_doorbell_process_slice(kfd)); 87 88 doorbell_aperture_size = 89 rounddown(kfd->shared_resources.doorbell_aperture_size, 90 kfd_doorbell_process_slice(kfd)); 91 92 if (doorbell_aperture_size > doorbell_start_offset) 93 doorbell_process_limit = 94 (doorbell_aperture_size - doorbell_start_offset) / 95 kfd_doorbell_process_slice(kfd); 96 else 97 return -ENOSPC; 98 99 if (!kfd->max_doorbell_slices || 100 doorbell_process_limit < kfd->max_doorbell_slices) 101 kfd->max_doorbell_slices = doorbell_process_limit; 102 103 kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + 104 doorbell_start_offset; 105 106 kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); 107 108 kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, 109 kfd_doorbell_process_slice(kfd)); 110 111 if (!kfd->doorbell_kernel_ptr) 112 return -ENOMEM; 113 114 pr_debug("Doorbell initialization:\n"); 115 pr_debug("doorbell base == 0x%08lX\n", 116 (uintptr_t)kfd->doorbell_base); 117 118 pr_debug("doorbell_base_dw_offset == 0x%08lX\n", 119 kfd->doorbell_base_dw_offset); 120 121 pr_debug("doorbell_process_limit == 0x%08lX\n", 122 doorbell_process_limit); 123 124 pr_debug("doorbell_kernel_offset == 0x%08lX\n", 125 (uintptr_t)kfd->doorbell_base); 126 127 pr_debug("doorbell aperture size == 0x%08lX\n", 128 kfd->shared_resources.doorbell_aperture_size); 129 130 pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); 131 132 return 0; 133 } 134 135 void kfd_doorbell_fini(struct kfd_dev *kfd) 136 { 137 if (kfd->doorbell_kernel_ptr) 138 iounmap(kfd->doorbell_kernel_ptr); 139 } 140 141 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 142 struct vm_area_struct *vma) 143 { 144 phys_addr_t address; 145 struct kfd_process_device *pdd; 146 147 /* 148 * For simplicitly we only allow mapping of the entire doorbell 149 * allocation of a single device & process. 150 */ 151 if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) 152 return -EINVAL; 153 154 pdd = kfd_get_process_device_data(dev, process); 155 if (!pdd) 156 return -EINVAL; 157 158 /* Calculate physical address of doorbell */ 159 address = kfd_get_process_doorbells(pdd); 160 if (!address) 161 return -ENOMEM; 162 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | 163 VM_DONTDUMP | VM_PFNMAP; 164 165 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 166 167 pr_debug("Mapping doorbell page\n" 168 " target user address == 0x%08llX\n" 169 " physical address == 0x%08llX\n" 170 " vm_flags == 0x%04lX\n" 171 " size == 0x%04lX\n", 172 (unsigned long long) vma->vm_start, address, vma->vm_flags, 173 kfd_doorbell_process_slice(dev)); 174 175 176 return io_remap_pfn_range(vma, 177 vma->vm_start, 178 address >> PAGE_SHIFT, 179 kfd_doorbell_process_slice(dev), 180 vma->vm_page_prot); 181 } 182 183 184 /* get kernel iomem pointer for a doorbell */ 185 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 186 unsigned int *doorbell_off) 187 { 188 u32 inx; 189 190 mutex_lock(&kfd->doorbell_mutex); 191 inx = find_first_zero_bit(kfd->doorbell_available_index, 192 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 193 194 __set_bit(inx, kfd->doorbell_available_index); 195 mutex_unlock(&kfd->doorbell_mutex); 196 197 if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 198 return NULL; 199 200 inx *= kfd->device_info.doorbell_size / sizeof(u32); 201 202 /* 203 * Calculating the kernel doorbell offset using the first 204 * doorbell page. 205 */ 206 *doorbell_off = kfd->doorbell_base_dw_offset + inx; 207 208 pr_debug("Get kernel queue doorbell\n" 209 " doorbell offset == 0x%08X\n" 210 " doorbell index == 0x%x\n", 211 *doorbell_off, inx); 212 213 return kfd->doorbell_kernel_ptr + inx; 214 } 215 216 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) 217 { 218 unsigned int inx; 219 220 inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) 221 * sizeof(u32) / kfd->device_info.doorbell_size; 222 223 mutex_lock(&kfd->doorbell_mutex); 224 __clear_bit(inx, kfd->doorbell_available_index); 225 mutex_unlock(&kfd->doorbell_mutex); 226 } 227 228 void write_kernel_doorbell(void __iomem *db, u32 value) 229 { 230 if (db) { 231 writel(value, db); 232 pr_debug("Writing %d to doorbell address %p\n", value, db); 233 } 234 } 235 236 void write_kernel_doorbell64(void __iomem *db, u64 value) 237 { 238 if (db) { 239 WARN(((unsigned long)db & 7) != 0, 240 "Unaligned 64-bit doorbell"); 241 writeq(value, (u64 __iomem *)db); 242 pr_debug("writing %llu to doorbell address %p\n", value, db); 243 } 244 } 245 246 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, 247 struct kfd_process_device *pdd, 248 unsigned int doorbell_id) 249 { 250 /* 251 * doorbell_base_dw_offset accounts for doorbells taken by KGD. 252 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to 253 * the process's doorbells. The offset returned is in dword 254 * units regardless of the ASIC-dependent doorbell size. 255 */ 256 if (!kfd->shared_resources.enable_mes) 257 return kfd->doorbell_base_dw_offset + 258 pdd->doorbell_index 259 * kfd_doorbell_process_slice(kfd) / sizeof(u32) + 260 doorbell_id * 261 kfd->device_info.doorbell_size / sizeof(u32); 262 else 263 return amdgpu_mes_get_doorbell_dw_offset_in_bar( 264 (struct amdgpu_device *)kfd->adev, 265 pdd->doorbell_index, doorbell_id); 266 } 267 268 uint64_t kfd_get_number_elems(struct kfd_dev *kfd) 269 { 270 uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - 271 kfd->shared_resources.doorbell_start_offset) / 272 kfd_doorbell_process_slice(kfd) + 1; 273 274 return num_of_elems; 275 276 } 277 278 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) 279 { 280 if (!pdd->doorbell_index) { 281 int r = kfd_alloc_process_doorbells(pdd->dev, 282 &pdd->doorbell_index); 283 if (r) 284 return 0; 285 } 286 287 return pdd->dev->doorbell_base + 288 pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); 289 } 290 291 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) 292 { 293 int r = 0; 294 295 if (!kfd->shared_resources.enable_mes) 296 r = ida_simple_get(&kfd->doorbell_ida, 1, 297 kfd->max_doorbell_slices, GFP_KERNEL); 298 else 299 r = amdgpu_mes_alloc_process_doorbells( 300 (struct amdgpu_device *)kfd->adev, 301 doorbell_index); 302 303 if (r > 0) 304 *doorbell_index = r; 305 306 if (r < 0) 307 pr_err("Failed to allocate process doorbells\n"); 308 309 return r; 310 } 311 312 void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) 313 { 314 if (doorbell_index) { 315 if (!kfd->shared_resources.enable_mes) 316 ida_simple_remove(&kfd->doorbell_ida, doorbell_index); 317 else 318 amdgpu_mes_free_process_doorbells( 319 (struct amdgpu_device *)kfd->adev, 320 doorbell_index); 321 } 322 } 323