1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "kfd_priv.h"
24 #include <linux/mm.h>
25 #include <linux/mman.h>
26 #include <linux/slab.h>
27 #include <linux/io.h>
28 #include <linux/idr.h>
29 
30 /*
31  * This extension supports a kernel level doorbells management for the
32  * kernel queues using the first doorbell page reserved for the kernel.
33  */
34 
35 /*
36  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
37  * receives 32-bit writes that are passed to queues as wptr values.
38  * The doorbells are intended to be written by applications as part
39  * of queueing work on user-mode queues.
40  * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
41  * We map the doorbell address space into user-mode when a process creates
42  * its first queue on each device.
43  * Although the mapping is done by KFD, it is equivalent to an mmap of
44  * the /dev/kfd with the particular device encoded in the mmap offset.
45  * There will be other uses for mmap of /dev/kfd, so only a range of
46  * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
47  */
48 
49 /* # of doorbell bytes allocated for each process. */
50 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
51 {
52 	if (!kfd->shared_resources.enable_mes)
53 		return roundup(kfd->device_info.doorbell_size *
54 				KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
55 				PAGE_SIZE);
56 	else
57 		return amdgpu_mes_doorbell_process_slice(
58 					(struct amdgpu_device *)kfd->adev);
59 }
60 
61 /* Doorbell calculations for device init. */
62 int kfd_doorbell_init(struct kfd_dev *kfd)
63 {
64 	int size = PAGE_SIZE;
65 	int r;
66 
67 	/*
68 	 * Todo: KFD kernel level operations need only one doorbell for
69 	 * ring test/HWS. So instead of reserving a whole page here for
70 	 * kernel, reserve and consume a doorbell from existing KGD kernel
71 	 * doorbell page.
72 	 */
73 
74 	/* Bitmap to dynamically allocate doorbells from kernel page */
75 	kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
76 	if (!kfd->doorbell_bitmap) {
77 		DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
78 		return -ENOMEM;
79 	}
80 
81 	/* Alloc a doorbell page for KFD kernel usages */
82 	r = amdgpu_bo_create_kernel(kfd->adev,
83 				    size,
84 				    PAGE_SIZE,
85 				    AMDGPU_GEM_DOMAIN_DOORBELL,
86 				    &kfd->doorbells,
87 				    NULL,
88 				    (void **)&kfd->doorbell_kernel_ptr);
89 	if (r) {
90 		pr_err("failed to allocate kernel doorbells\n");
91 		bitmap_free(kfd->doorbell_bitmap);
92 		return r;
93 	}
94 
95 	pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
96 	return 0;
97 }
98 
99 void kfd_doorbell_fini(struct kfd_dev *kfd)
100 {
101 	bitmap_free(kfd->doorbell_bitmap);
102 	amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
103 			     (void **)&kfd->doorbell_kernel_ptr);
104 }
105 
106 int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
107 		      struct vm_area_struct *vma)
108 {
109 	phys_addr_t address;
110 	struct kfd_process_device *pdd;
111 
112 	/*
113 	 * For simplicitly we only allow mapping of the entire doorbell
114 	 * allocation of a single device & process.
115 	 */
116 	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd))
117 		return -EINVAL;
118 
119 	pdd = kfd_get_process_device_data(dev, process);
120 	if (!pdd)
121 		return -EINVAL;
122 
123 	/* Calculate physical address of doorbell */
124 	address = kfd_get_process_doorbells(pdd);
125 	if (!address)
126 		return -ENOMEM;
127 	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
128 				VM_DONTDUMP | VM_PFNMAP);
129 
130 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
131 
132 	pr_debug("Mapping doorbell page\n"
133 		 "     target user address == 0x%08llX\n"
134 		 "     physical address    == 0x%08llX\n"
135 		 "     vm_flags            == 0x%04lX\n"
136 		 "     size                == 0x%04lX\n",
137 		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
138 		 kfd_doorbell_process_slice(dev->kfd));
139 
140 
141 	return io_remap_pfn_range(vma,
142 				vma->vm_start,
143 				address >> PAGE_SHIFT,
144 				kfd_doorbell_process_slice(dev->kfd),
145 				vma->vm_page_prot);
146 }
147 
148 
149 /* get kernel iomem pointer for a doorbell */
150 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
151 					unsigned int *doorbell_off)
152 {
153 	u32 inx;
154 
155 	mutex_lock(&kfd->doorbell_mutex);
156 	inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
157 
158 	__set_bit(inx, kfd->doorbell_bitmap);
159 	mutex_unlock(&kfd->doorbell_mutex);
160 
161 	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
162 		return NULL;
163 
164 	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
165 
166 	pr_debug("Get kernel queue doorbell\n"
167 			"     doorbell offset   == 0x%08X\n"
168 			"     doorbell index    == 0x%x\n",
169 		*doorbell_off, inx);
170 
171 	return kfd->doorbell_kernel_ptr + inx;
172 }
173 
174 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
175 {
176 	unsigned int inx;
177 
178 	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
179 
180 	mutex_lock(&kfd->doorbell_mutex);
181 	__clear_bit(inx, kfd->doorbell_bitmap);
182 	mutex_unlock(&kfd->doorbell_mutex);
183 }
184 
185 void write_kernel_doorbell(void __iomem *db, u32 value)
186 {
187 	if (db) {
188 		writel(value, db);
189 		pr_debug("Writing %d to doorbell address %p\n", value, db);
190 	}
191 }
192 
193 void write_kernel_doorbell64(void __iomem *db, u64 value)
194 {
195 	if (db) {
196 		WARN(((unsigned long)db & 7) != 0,
197 		     "Unaligned 64-bit doorbell");
198 		writeq(value, (u64 __iomem *)db);
199 		pr_debug("writing %llu to doorbell address %p\n", value, db);
200 	}
201 }
202 
203 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
204 					struct kfd_process_device *pdd,
205 					unsigned int doorbell_id)
206 {
207 	/*
208 	 * doorbell_base_dw_offset accounts for doorbells taken by KGD.
209 	 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
210 	 * the process's doorbells. The offset returned is in dword
211 	 * units regardless of the ASIC-dependent doorbell size.
212 	 */
213 	if (!kfd->shared_resources.enable_mes)
214 		return kfd->doorbell_base_dw_offset +
215 			pdd->doorbell_index
216 			* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
217 			doorbell_id *
218 			kfd->device_info.doorbell_size / sizeof(u32);
219 	else
220 		return amdgpu_mes_get_doorbell_dw_offset_in_bar(
221 				(struct amdgpu_device *)kfd->adev,
222 				pdd->doorbell_index, doorbell_id);
223 }
224 
225 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
226 {
227 	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
228 				kfd->shared_resources.doorbell_start_offset) /
229 					kfd_doorbell_process_slice(kfd) + 1;
230 
231 	return num_of_elems;
232 
233 }
234 
235 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
236 				struct kfd_dev *dev)
237 {
238 	unsigned int i;
239 	int range_start = dev->shared_resources.non_cp_doorbells_start;
240 	int range_end = dev->shared_resources.non_cp_doorbells_end;
241 
242 	if (!KFD_IS_SOC15(dev))
243 		return 0;
244 
245 	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
246 	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
247 	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
248 			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
249 			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
250 
251 	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
252 		if (i >= range_start && i <= range_end) {
253 			__set_bit(i, qpd->doorbell_bitmap);
254 			__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
255 				  qpd->doorbell_bitmap);
256 		}
257 	}
258 
259 	return 0;
260 }
261 
262 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
263 {
264 	struct amdgpu_device *adev = pdd->dev->adev;
265 	uint32_t first_db_index;
266 
267 	if (!pdd->qpd.proc_doorbells) {
268 		if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
269 			/* phys_addr_t 0 is error */
270 			return 0;
271 	}
272 
273 	first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
274 	return adev->doorbell.base + first_db_index * sizeof(uint32_t);
275 }
276 
277 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
278 {
279 	int r;
280 	struct qcm_process_device *qpd = &pdd->qpd;
281 
282 	/* Allocate bitmap for dynamic doorbell allocation */
283 	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
284 					     GFP_KERNEL);
285 	if (!qpd->doorbell_bitmap) {
286 		DRM_ERROR("Failed to allocate process doorbell bitmap\n");
287 		return -ENOMEM;
288 	}
289 
290 	r = init_doorbell_bitmap(&pdd->qpd, kfd);
291 	if (r) {
292 		DRM_ERROR("Failed to initialize process doorbells\n");
293 		r = -ENOMEM;
294 		goto err;
295 	}
296 
297 	/* Allocate doorbells for this process */
298 	r = amdgpu_bo_create_kernel(kfd->adev,
299 				    kfd_doorbell_process_slice(kfd),
300 				    PAGE_SIZE,
301 				    AMDGPU_GEM_DOMAIN_DOORBELL,
302 				    &qpd->proc_doorbells,
303 				    NULL,
304 				    NULL);
305 	if (r) {
306 		DRM_ERROR("Failed to allocate process doorbells\n");
307 		goto err;
308 	}
309 	return 0;
310 
311 err:
312 	bitmap_free(qpd->doorbell_bitmap);
313 	qpd->doorbell_bitmap = NULL;
314 	return r;
315 }
316 
317 void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
318 {
319 	struct qcm_process_device *qpd = &pdd->qpd;
320 
321 	if (qpd->doorbell_bitmap) {
322 		bitmap_free(qpd->doorbell_bitmap);
323 		qpd->doorbell_bitmap = NULL;
324 	}
325 
326 	amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
327 }
328