132de57e9SJack Xiao /*
232de57e9SJack Xiao * Copyright 2019 Advanced Micro Devices, Inc.
332de57e9SJack Xiao *
432de57e9SJack Xiao * Permission is hereby granted, free of charge, to any person obtaining a
532de57e9SJack Xiao * copy of this software and associated documentation files (the "Software"),
632de57e9SJack Xiao * to deal in the Software without restriction, including without limitation
732de57e9SJack Xiao * the rights to use, copy, modify, merge, publish, distribute, sublicense,
832de57e9SJack Xiao * and/or sell copies of the Software, and to permit persons to whom the
932de57e9SJack Xiao * Software is furnished to do so, subject to the following conditions:
1032de57e9SJack Xiao *
1132de57e9SJack Xiao * The above copyright notice and this permission notice shall be included in
1232de57e9SJack Xiao * all copies or substantial portions of the Software.
1332de57e9SJack Xiao *
1432de57e9SJack Xiao * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1532de57e9SJack Xiao * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1632de57e9SJack Xiao * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1732de57e9SJack Xiao * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1832de57e9SJack Xiao * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1932de57e9SJack Xiao * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2032de57e9SJack Xiao * OTHER DEALINGS IN THE SOFTWARE.
2132de57e9SJack Xiao *
2232de57e9SJack Xiao */
2332de57e9SJack Xiao
24cc42e76eSMario Limonciello #include <linux/firmware.h>
252acc73f8SChristian König #include <drm/drm_exec.h>
26cc42e76eSMario Limonciello
2732de57e9SJack Xiao #include "amdgpu_mes.h"
2832de57e9SJack Xiao #include "amdgpu.h"
2932de57e9SJack Xiao #include "soc15_common.h"
3032de57e9SJack Xiao #include "amdgpu_mes_ctx.h"
3132de57e9SJack Xiao
3232de57e9SJack Xiao #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
3332de57e9SJack Xiao #define AMDGPU_ONE_DOORBELL_SIZE 8
3432de57e9SJack Xiao
amdgpu_mes_doorbell_process_slice(struct amdgpu_device * adev)35464913c0SMukul Joshi int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
3632de57e9SJack Xiao {
3732de57e9SJack Xiao return roundup(AMDGPU_ONE_DOORBELL_SIZE *
3832de57e9SJack Xiao AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
3932de57e9SJack Xiao PAGE_SIZE);
4032de57e9SJack Xiao }
4132de57e9SJack Xiao
amdgpu_mes_kernel_doorbell_get(struct amdgpu_device * adev,struct amdgpu_mes_process * process,int ip_type,uint64_t * doorbell_index)42e3cbb1f4SShashank Sharma static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
4332de57e9SJack Xiao struct amdgpu_mes_process *process,
4432de57e9SJack Xiao int ip_type, uint64_t *doorbell_index)
4532de57e9SJack Xiao {
4632de57e9SJack Xiao unsigned int offset, found;
47e3cbb1f4SShashank Sharma struct amdgpu_mes *mes = &adev->mes;
4832de57e9SJack Xiao
49e3cbb1f4SShashank Sharma if (ip_type == AMDGPU_RING_TYPE_SDMA)
5032de57e9SJack Xiao offset = adev->doorbell_index.sdma_engine[0];
51e3cbb1f4SShashank Sharma else
52e3cbb1f4SShashank Sharma offset = 0;
5332de57e9SJack Xiao
54e3cbb1f4SShashank Sharma found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
55e3cbb1f4SShashank Sharma if (found >= mes->num_mes_dbs) {
5632de57e9SJack Xiao DRM_WARN("No doorbell available\n");
5732de57e9SJack Xiao return -ENOSPC;
5832de57e9SJack Xiao }
5932de57e9SJack Xiao
60e3cbb1f4SShashank Sharma set_bit(found, mes->doorbell_bitmap);
6132de57e9SJack Xiao
62e3cbb1f4SShashank Sharma /* Get the absolute doorbell index on BAR */
63e3cbb1f4SShashank Sharma *doorbell_index = mes->db_start_dw_offset + found * 2;
6432de57e9SJack Xiao return 0;
6532de57e9SJack Xiao }
6632de57e9SJack Xiao
amdgpu_mes_kernel_doorbell_free(struct amdgpu_device * adev,struct amdgpu_mes_process * process,uint32_t doorbell_index)67e3cbb1f4SShashank Sharma static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
6832de57e9SJack Xiao struct amdgpu_mes_process *process,
6932de57e9SJack Xiao uint32_t doorbell_index)
7032de57e9SJack Xiao {
71e3cbb1f4SShashank Sharma unsigned int old, rel_index;
72e3cbb1f4SShashank Sharma struct amdgpu_mes *mes = &adev->mes;
7332de57e9SJack Xiao
74e3cbb1f4SShashank Sharma /* Find the relative index of the doorbell in this object */
75e3cbb1f4SShashank Sharma rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
76e3cbb1f4SShashank Sharma old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
7732de57e9SJack Xiao WARN_ON(!old);
7832de57e9SJack Xiao }
7932de57e9SJack Xiao
amdgpu_mes_doorbell_init(struct amdgpu_device * adev)8032de57e9SJack Xiao static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
8132de57e9SJack Xiao {
820fe69062SLe Ma int i;
83e3cbb1f4SShashank Sharma struct amdgpu_mes *mes = &adev->mes;
8432de57e9SJack Xiao
85e3cbb1f4SShashank Sharma /* Bitmap for dynamic allocation of kernel doorbells */
86e3cbb1f4SShashank Sharma mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
87e3cbb1f4SShashank Sharma if (!mes->doorbell_bitmap) {
88e3cbb1f4SShashank Sharma DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
89e3cbb1f4SShashank Sharma return -ENOMEM;
90e3cbb1f4SShashank Sharma }
910fe69062SLe Ma
92e3cbb1f4SShashank Sharma mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
93e3cbb1f4SShashank Sharma for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
94e3cbb1f4SShashank Sharma adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
95e3cbb1f4SShashank Sharma set_bit(i, mes->doorbell_bitmap);
96e3cbb1f4SShashank Sharma }
9732de57e9SJack Xiao
9832de57e9SJack Xiao return 0;
9932de57e9SJack Xiao }
100b04c1d64SJack Xiao
amdgpu_mes_doorbell_free(struct amdgpu_device * adev)101e3cbb1f4SShashank Sharma static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
102e3cbb1f4SShashank Sharma {
103e3cbb1f4SShashank Sharma bitmap_free(adev->mes.doorbell_bitmap);
104e3cbb1f4SShashank Sharma }
105e3cbb1f4SShashank Sharma
amdgpu_mes_init(struct amdgpu_device * adev)106b04c1d64SJack Xiao int amdgpu_mes_init(struct amdgpu_device *adev)
107b04c1d64SJack Xiao {
108b04c1d64SJack Xiao int i, r;
109b04c1d64SJack Xiao
110b04c1d64SJack Xiao adev->mes.adev = adev;
111b04c1d64SJack Xiao
112b04c1d64SJack Xiao idr_init(&adev->mes.pasid_idr);
113b04c1d64SJack Xiao idr_init(&adev->mes.gang_id_idr);
114b04c1d64SJack Xiao idr_init(&adev->mes.queue_id_idr);
115b04c1d64SJack Xiao ida_init(&adev->mes.doorbell_ida);
116b04c1d64SJack Xiao spin_lock_init(&adev->mes.queue_id_lock);
11735ba8850SJack Xiao spin_lock_init(&adev->mes.ring_lock);
11818ee4ce6SJack Xiao mutex_init(&adev->mes.mutex_hidden);
119b04c1d64SJack Xiao
120b04c1d64SJack Xiao adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
121b04c1d64SJack Xiao adev->mes.vmid_mask_mmhub = 0xffffff00;
122b04c1d64SJack Xiao adev->mes.vmid_mask_gfxhub = 0xffffff00;
123b04c1d64SJack Xiao
124b04c1d64SJack Xiao for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
125b04c1d64SJack Xiao /* use only 1st MEC pipes */
126b04c1d64SJack Xiao if (i >= 4)
127b04c1d64SJack Xiao continue;
128b04c1d64SJack Xiao adev->mes.compute_hqd_mask[i] = 0xc;
129b04c1d64SJack Xiao }
130b04c1d64SJack Xiao
131b04c1d64SJack Xiao for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
132b04c1d64SJack Xiao adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
133b04c1d64SJack Xiao
13418ee4ce6SJack Xiao for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
13518ee4ce6SJack Xiao if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
136b04c1d64SJack Xiao adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
1370af4ed0cSYifan Zhang /* zero sdma_hqd_mask for non-existent engine */
1380af4ed0cSYifan Zhang else if (adev->sdma.num_instances == 1)
1390af4ed0cSYifan Zhang adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
14018ee4ce6SJack Xiao else
14118ee4ce6SJack Xiao adev->mes.sdma_hqd_mask[i] = 0xfc;
14218ee4ce6SJack Xiao }
143b04c1d64SJack Xiao
144b04c1d64SJack Xiao r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
145b04c1d64SJack Xiao if (r) {
146b04c1d64SJack Xiao dev_err(adev->dev,
147b04c1d64SJack Xiao "(%d) ring trail_fence_offs wb alloc failed\n", r);
148b04c1d64SJack Xiao goto error_ids;
149b04c1d64SJack Xiao }
150b04c1d64SJack Xiao adev->mes.sch_ctx_gpu_addr =
151b04c1d64SJack Xiao adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
152b04c1d64SJack Xiao adev->mes.sch_ctx_ptr =
153b04c1d64SJack Xiao (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
154b04c1d64SJack Xiao
1550bf478f0SJack Xiao r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
1560bf478f0SJack Xiao if (r) {
157adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1580bf478f0SJack Xiao dev_err(adev->dev,
1590bf478f0SJack Xiao "(%d) query_status_fence_offs wb alloc failed\n", r);
160adc0e6abSJack Xiao goto error_ids;
1610bf478f0SJack Xiao }
1620bf478f0SJack Xiao adev->mes.query_status_fence_gpu_addr =
1630bf478f0SJack Xiao adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
1640bf478f0SJack Xiao adev->mes.query_status_fence_ptr =
1650bf478f0SJack Xiao (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
1660bf478f0SJack Xiao
167adc0e6abSJack Xiao r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
168adc0e6abSJack Xiao if (r) {
169adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
170adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
171adc0e6abSJack Xiao dev_err(adev->dev,
172adc0e6abSJack Xiao "(%d) read_val_offs alloc failed\n", r);
173adc0e6abSJack Xiao goto error_ids;
174adc0e6abSJack Xiao }
175adc0e6abSJack Xiao adev->mes.read_val_gpu_addr =
176adc0e6abSJack Xiao adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
177adc0e6abSJack Xiao adev->mes.read_val_ptr =
178adc0e6abSJack Xiao (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
179adc0e6abSJack Xiao
180b04c1d64SJack Xiao r = amdgpu_mes_doorbell_init(adev);
181b04c1d64SJack Xiao if (r)
182b04c1d64SJack Xiao goto error;
183b04c1d64SJack Xiao
184b04c1d64SJack Xiao return 0;
185b04c1d64SJack Xiao
186b04c1d64SJack Xiao error:
187b04c1d64SJack Xiao amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
188adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
189adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
190b04c1d64SJack Xiao error_ids:
191b04c1d64SJack Xiao idr_destroy(&adev->mes.pasid_idr);
192b04c1d64SJack Xiao idr_destroy(&adev->mes.gang_id_idr);
193b04c1d64SJack Xiao idr_destroy(&adev->mes.queue_id_idr);
194b04c1d64SJack Xiao ida_destroy(&adev->mes.doorbell_ida);
19518ee4ce6SJack Xiao mutex_destroy(&adev->mes.mutex_hidden);
196b04c1d64SJack Xiao return r;
197b04c1d64SJack Xiao }
198b04c1d64SJack Xiao
amdgpu_mes_fini(struct amdgpu_device * adev)199b04c1d64SJack Xiao void amdgpu_mes_fini(struct amdgpu_device *adev)
200b04c1d64SJack Xiao {
201b04c1d64SJack Xiao amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
202adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
203adc0e6abSJack Xiao amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
204e3cbb1f4SShashank Sharma amdgpu_mes_doorbell_free(adev);
205b04c1d64SJack Xiao
206b04c1d64SJack Xiao idr_destroy(&adev->mes.pasid_idr);
207b04c1d64SJack Xiao idr_destroy(&adev->mes.gang_id_idr);
208b04c1d64SJack Xiao idr_destroy(&adev->mes.queue_id_idr);
209b04c1d64SJack Xiao ida_destroy(&adev->mes.doorbell_ida);
21018ee4ce6SJack Xiao mutex_destroy(&adev->mes.mutex_hidden);
21118ee4ce6SJack Xiao }
21218ee4ce6SJack Xiao
amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue * q)21318ee4ce6SJack Xiao static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
21418ee4ce6SJack Xiao {
21518ee4ce6SJack Xiao amdgpu_bo_free_kernel(&q->mqd_obj,
21618ee4ce6SJack Xiao &q->mqd_gpu_addr,
21718ee4ce6SJack Xiao &q->mqd_cpu_ptr);
218b04c1d64SJack Xiao }
21948dcd2b7SJack Xiao
amdgpu_mes_create_process(struct amdgpu_device * adev,int pasid,struct amdgpu_vm * vm)22048dcd2b7SJack Xiao int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
22148dcd2b7SJack Xiao struct amdgpu_vm *vm)
22248dcd2b7SJack Xiao {
22348dcd2b7SJack Xiao struct amdgpu_mes_process *process;
22448dcd2b7SJack Xiao int r;
22548dcd2b7SJack Xiao
22648dcd2b7SJack Xiao /* allocate the mes process buffer */
22748dcd2b7SJack Xiao process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
22848dcd2b7SJack Xiao if (!process) {
22948dcd2b7SJack Xiao DRM_ERROR("no more memory to create mes process\n");
23048dcd2b7SJack Xiao return -ENOMEM;
23148dcd2b7SJack Xiao }
23248dcd2b7SJack Xiao
23348dcd2b7SJack Xiao /* allocate the process context bo and map it */
23448dcd2b7SJack Xiao r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
23548dcd2b7SJack Xiao AMDGPU_GEM_DOMAIN_GTT,
23648dcd2b7SJack Xiao &process->proc_ctx_bo,
23748dcd2b7SJack Xiao &process->proc_ctx_gpu_addr,
23848dcd2b7SJack Xiao &process->proc_ctx_cpu_ptr);
23948dcd2b7SJack Xiao if (r) {
24048dcd2b7SJack Xiao DRM_ERROR("failed to allocate process context bo\n");
24118ee4ce6SJack Xiao goto clean_up_memory;
24248dcd2b7SJack Xiao }
24348dcd2b7SJack Xiao memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
24448dcd2b7SJack Xiao
24518ee4ce6SJack Xiao /*
24618ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
24718ee4ce6SJack Xiao * lock dependencies.
24818ee4ce6SJack Xiao */
24918ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
25018ee4ce6SJack Xiao
25118ee4ce6SJack Xiao /* add the mes process to idr list */
25218ee4ce6SJack Xiao r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
25318ee4ce6SJack Xiao GFP_KERNEL);
25418ee4ce6SJack Xiao if (r < 0) {
25518ee4ce6SJack Xiao DRM_ERROR("failed to lock pasid=%d\n", pasid);
25618ee4ce6SJack Xiao goto clean_up_ctx;
25718ee4ce6SJack Xiao }
25818ee4ce6SJack Xiao
25948dcd2b7SJack Xiao INIT_LIST_HEAD(&process->gang_list);
26048dcd2b7SJack Xiao process->vm = vm;
26148dcd2b7SJack Xiao process->pasid = pasid;
26248dcd2b7SJack Xiao process->process_quantum = adev->mes.default_process_quantum;
26348dcd2b7SJack Xiao process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
26448dcd2b7SJack Xiao
26518ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
26648dcd2b7SJack Xiao return 0;
26748dcd2b7SJack Xiao
26848dcd2b7SJack Xiao clean_up_ctx:
269664c3b03SShashank Sharma amdgpu_mes_unlock(&adev->mes);
27048dcd2b7SJack Xiao amdgpu_bo_free_kernel(&process->proc_ctx_bo,
27148dcd2b7SJack Xiao &process->proc_ctx_gpu_addr,
27248dcd2b7SJack Xiao &process->proc_ctx_cpu_ptr);
27348dcd2b7SJack Xiao clean_up_memory:
27448dcd2b7SJack Xiao kfree(process);
27548dcd2b7SJack Xiao return r;
27648dcd2b7SJack Xiao }
277063a38d6SJack Xiao
amdgpu_mes_destroy_process(struct amdgpu_device * adev,int pasid)278063a38d6SJack Xiao void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
279063a38d6SJack Xiao {
280063a38d6SJack Xiao struct amdgpu_mes_process *process;
281063a38d6SJack Xiao struct amdgpu_mes_gang *gang, *tmp1;
282063a38d6SJack Xiao struct amdgpu_mes_queue *queue, *tmp2;
283063a38d6SJack Xiao struct mes_remove_queue_input queue_input;
284063a38d6SJack Xiao unsigned long flags;
285063a38d6SJack Xiao int r;
286063a38d6SJack Xiao
28718ee4ce6SJack Xiao /*
28818ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
28918ee4ce6SJack Xiao * lock dependencies.
29018ee4ce6SJack Xiao */
29118ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
292063a38d6SJack Xiao
293063a38d6SJack Xiao process = idr_find(&adev->mes.pasid_idr, pasid);
294063a38d6SJack Xiao if (!process) {
295063a38d6SJack Xiao DRM_WARN("pasid %d doesn't exist\n", pasid);
29618ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
297063a38d6SJack Xiao return;
298063a38d6SJack Xiao }
299063a38d6SJack Xiao
30018ee4ce6SJack Xiao /* Remove all queues from hardware */
301063a38d6SJack Xiao list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
302063a38d6SJack Xiao list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
303063a38d6SJack Xiao spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
304063a38d6SJack Xiao idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
305063a38d6SJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
306063a38d6SJack Xiao
307063a38d6SJack Xiao queue_input.doorbell_offset = queue->doorbell_off;
308063a38d6SJack Xiao queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
309063a38d6SJack Xiao
310063a38d6SJack Xiao r = adev->mes.funcs->remove_hw_queue(&adev->mes,
311063a38d6SJack Xiao &queue_input);
312063a38d6SJack Xiao if (r)
313063a38d6SJack Xiao DRM_WARN("failed to remove hardware queue\n");
314063a38d6SJack Xiao }
315063a38d6SJack Xiao
316063a38d6SJack Xiao idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
31718ee4ce6SJack Xiao }
31818ee4ce6SJack Xiao
31918ee4ce6SJack Xiao idr_remove(&adev->mes.pasid_idr, pasid);
32018ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
32118ee4ce6SJack Xiao
32218ee4ce6SJack Xiao /* free all memory allocated by the process */
32318ee4ce6SJack Xiao list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
32418ee4ce6SJack Xiao /* free all queues in the gang */
32518ee4ce6SJack Xiao list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
32618ee4ce6SJack Xiao amdgpu_mes_queue_free_mqd(queue);
32718ee4ce6SJack Xiao list_del(&queue->list);
32818ee4ce6SJack Xiao kfree(queue);
32918ee4ce6SJack Xiao }
330063a38d6SJack Xiao amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
331063a38d6SJack Xiao &gang->gang_ctx_gpu_addr,
332063a38d6SJack Xiao &gang->gang_ctx_cpu_ptr);
333063a38d6SJack Xiao list_del(&gang->list);
334063a38d6SJack Xiao kfree(gang);
33518ee4ce6SJack Xiao
336063a38d6SJack Xiao }
337063a38d6SJack Xiao amdgpu_bo_free_kernel(&process->proc_ctx_bo,
338063a38d6SJack Xiao &process->proc_ctx_gpu_addr,
339063a38d6SJack Xiao &process->proc_ctx_cpu_ptr);
340063a38d6SJack Xiao kfree(process);
341063a38d6SJack Xiao }
3425d0f619fSJack Xiao
amdgpu_mes_add_gang(struct amdgpu_device * adev,int pasid,struct amdgpu_mes_gang_properties * gprops,int * gang_id)3435d0f619fSJack Xiao int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
3445d0f619fSJack Xiao struct amdgpu_mes_gang_properties *gprops,
3455d0f619fSJack Xiao int *gang_id)
3465d0f619fSJack Xiao {
3475d0f619fSJack Xiao struct amdgpu_mes_process *process;
3485d0f619fSJack Xiao struct amdgpu_mes_gang *gang;
3495d0f619fSJack Xiao int r;
3505d0f619fSJack Xiao
3515d0f619fSJack Xiao /* allocate the mes gang buffer */
3525d0f619fSJack Xiao gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
3535d0f619fSJack Xiao if (!gang) {
3545d0f619fSJack Xiao return -ENOMEM;
3555d0f619fSJack Xiao }
3565d0f619fSJack Xiao
3575d0f619fSJack Xiao /* allocate the gang context bo and map it to cpu space */
3585d0f619fSJack Xiao r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
3595d0f619fSJack Xiao AMDGPU_GEM_DOMAIN_GTT,
3605d0f619fSJack Xiao &gang->gang_ctx_bo,
3615d0f619fSJack Xiao &gang->gang_ctx_gpu_addr,
3625d0f619fSJack Xiao &gang->gang_ctx_cpu_ptr);
3635d0f619fSJack Xiao if (r) {
3645d0f619fSJack Xiao DRM_ERROR("failed to allocate process context bo\n");
36518ee4ce6SJack Xiao goto clean_up_mem;
3665d0f619fSJack Xiao }
3675d0f619fSJack Xiao memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
3685d0f619fSJack Xiao
36918ee4ce6SJack Xiao /*
37018ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
37118ee4ce6SJack Xiao * lock dependencies.
37218ee4ce6SJack Xiao */
37318ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
37418ee4ce6SJack Xiao
37518ee4ce6SJack Xiao process = idr_find(&adev->mes.pasid_idr, pasid);
37618ee4ce6SJack Xiao if (!process) {
37718ee4ce6SJack Xiao DRM_ERROR("pasid %d doesn't exist\n", pasid);
37818ee4ce6SJack Xiao r = -EINVAL;
37918ee4ce6SJack Xiao goto clean_up_ctx;
38018ee4ce6SJack Xiao }
38118ee4ce6SJack Xiao
38218ee4ce6SJack Xiao /* add the mes gang to idr list */
38318ee4ce6SJack Xiao r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
38418ee4ce6SJack Xiao GFP_KERNEL);
38518ee4ce6SJack Xiao if (r < 0) {
38618ee4ce6SJack Xiao DRM_ERROR("failed to allocate idr for gang\n");
38718ee4ce6SJack Xiao goto clean_up_ctx;
38818ee4ce6SJack Xiao }
38918ee4ce6SJack Xiao
39018ee4ce6SJack Xiao gang->gang_id = r;
39118ee4ce6SJack Xiao *gang_id = r;
39218ee4ce6SJack Xiao
3935d0f619fSJack Xiao INIT_LIST_HEAD(&gang->queue_list);
3945d0f619fSJack Xiao gang->process = process;
3955d0f619fSJack Xiao gang->priority = gprops->priority;
3965d0f619fSJack Xiao gang->gang_quantum = gprops->gang_quantum ?
3975d0f619fSJack Xiao gprops->gang_quantum : adev->mes.default_gang_quantum;
3985d0f619fSJack Xiao gang->global_priority_level = gprops->global_priority_level;
3995d0f619fSJack Xiao gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
4005d0f619fSJack Xiao list_add_tail(&gang->list, &process->gang_list);
4015d0f619fSJack Xiao
40218ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
4035d0f619fSJack Xiao return 0;
4045d0f619fSJack Xiao
40518ee4ce6SJack Xiao clean_up_ctx:
40618ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
40718ee4ce6SJack Xiao amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
40818ee4ce6SJack Xiao &gang->gang_ctx_gpu_addr,
40918ee4ce6SJack Xiao &gang->gang_ctx_cpu_ptr);
41018ee4ce6SJack Xiao clean_up_mem:
4115d0f619fSJack Xiao kfree(gang);
4125d0f619fSJack Xiao return r;
4135d0f619fSJack Xiao }
414b0306e58SJack Xiao
amdgpu_mes_remove_gang(struct amdgpu_device * adev,int gang_id)415b0306e58SJack Xiao int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
416b0306e58SJack Xiao {
417b0306e58SJack Xiao struct amdgpu_mes_gang *gang;
418b0306e58SJack Xiao
41918ee4ce6SJack Xiao /*
42018ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
42118ee4ce6SJack Xiao * lock dependencies.
42218ee4ce6SJack Xiao */
42318ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
424b0306e58SJack Xiao
425b0306e58SJack Xiao gang = idr_find(&adev->mes.gang_id_idr, gang_id);
426b0306e58SJack Xiao if (!gang) {
427b0306e58SJack Xiao DRM_ERROR("gang id %d doesn't exist\n", gang_id);
42818ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
429b0306e58SJack Xiao return -EINVAL;
430b0306e58SJack Xiao }
431b0306e58SJack Xiao
432b0306e58SJack Xiao if (!list_empty(&gang->queue_list)) {
433b0306e58SJack Xiao DRM_ERROR("queue list is not empty\n");
43418ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
435b0306e58SJack Xiao return -EBUSY;
436b0306e58SJack Xiao }
437b0306e58SJack Xiao
438b0306e58SJack Xiao idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
43918ee4ce6SJack Xiao list_del(&gang->list);
44018ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
44118ee4ce6SJack Xiao
442b0306e58SJack Xiao amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
443b0306e58SJack Xiao &gang->gang_ctx_gpu_addr,
444b0306e58SJack Xiao &gang->gang_ctx_cpu_ptr);
44518ee4ce6SJack Xiao
446b0306e58SJack Xiao kfree(gang);
447b0306e58SJack Xiao
448b0306e58SJack Xiao return 0;
449b0306e58SJack Xiao }
450c8bb1057SJack Xiao
amdgpu_mes_suspend(struct amdgpu_device * adev)451c8bb1057SJack Xiao int amdgpu_mes_suspend(struct amdgpu_device *adev)
452c8bb1057SJack Xiao {
453c8bb1057SJack Xiao struct idr *idp;
454c8bb1057SJack Xiao struct amdgpu_mes_process *process;
455c8bb1057SJack Xiao struct amdgpu_mes_gang *gang;
456c8bb1057SJack Xiao struct mes_suspend_gang_input input;
457c8bb1057SJack Xiao int r, pasid;
458c8bb1057SJack Xiao
45918ee4ce6SJack Xiao /*
46018ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
46118ee4ce6SJack Xiao * lock dependencies.
46218ee4ce6SJack Xiao */
46318ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
464c8bb1057SJack Xiao
465c8bb1057SJack Xiao idp = &adev->mes.pasid_idr;
466c8bb1057SJack Xiao
467c8bb1057SJack Xiao idr_for_each_entry(idp, process, pasid) {
468c8bb1057SJack Xiao list_for_each_entry(gang, &process->gang_list, list) {
469c8bb1057SJack Xiao r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
470c8bb1057SJack Xiao if (r)
471c8bb1057SJack Xiao DRM_ERROR("failed to suspend pasid %d gangid %d",
472c8bb1057SJack Xiao pasid, gang->gang_id);
473c8bb1057SJack Xiao }
474c8bb1057SJack Xiao }
475c8bb1057SJack Xiao
47618ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
477c8bb1057SJack Xiao return 0;
478c8bb1057SJack Xiao }
479ea756bd5SJack Xiao
amdgpu_mes_resume(struct amdgpu_device * adev)480ea756bd5SJack Xiao int amdgpu_mes_resume(struct amdgpu_device *adev)
481ea756bd5SJack Xiao {
482ea756bd5SJack Xiao struct idr *idp;
483ea756bd5SJack Xiao struct amdgpu_mes_process *process;
484ea756bd5SJack Xiao struct amdgpu_mes_gang *gang;
485ea756bd5SJack Xiao struct mes_resume_gang_input input;
486ea756bd5SJack Xiao int r, pasid;
487ea756bd5SJack Xiao
48818ee4ce6SJack Xiao /*
48918ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
49018ee4ce6SJack Xiao * lock dependencies.
49118ee4ce6SJack Xiao */
49218ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
493ea756bd5SJack Xiao
494ea756bd5SJack Xiao idp = &adev->mes.pasid_idr;
495ea756bd5SJack Xiao
496ea756bd5SJack Xiao idr_for_each_entry(idp, process, pasid) {
497ea756bd5SJack Xiao list_for_each_entry(gang, &process->gang_list, list) {
498ea756bd5SJack Xiao r = adev->mes.funcs->resume_gang(&adev->mes, &input);
499ea756bd5SJack Xiao if (r)
500ea756bd5SJack Xiao DRM_ERROR("failed to resume pasid %d gangid %d",
501ea756bd5SJack Xiao pasid, gang->gang_id);
502ea756bd5SJack Xiao }
503ea756bd5SJack Xiao }
504ea756bd5SJack Xiao
50518ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
506ea756bd5SJack Xiao return 0;
507ea756bd5SJack Xiao }
5085fa963d0SJack Xiao
amdgpu_mes_queue_alloc_mqd(struct amdgpu_device * adev,struct amdgpu_mes_queue * q,struct amdgpu_mes_queue_properties * p)50918ee4ce6SJack Xiao static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
5105fa963d0SJack Xiao struct amdgpu_mes_queue *q,
5115fa963d0SJack Xiao struct amdgpu_mes_queue_properties *p)
5125fa963d0SJack Xiao {
5135fa963d0SJack Xiao struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
5145fa963d0SJack Xiao u32 mqd_size = mqd_mgr->mqd_size;
5155fa963d0SJack Xiao int r;
5165fa963d0SJack Xiao
5175fa963d0SJack Xiao r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
5185fa963d0SJack Xiao AMDGPU_GEM_DOMAIN_GTT,
5195fa963d0SJack Xiao &q->mqd_obj,
5205fa963d0SJack Xiao &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
5215fa963d0SJack Xiao if (r) {
5225fa963d0SJack Xiao dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
5235fa963d0SJack Xiao return r;
5245fa963d0SJack Xiao }
5255fa963d0SJack Xiao memset(q->mqd_cpu_ptr, 0, mqd_size);
5265fa963d0SJack Xiao
52718ee4ce6SJack Xiao r = amdgpu_bo_reserve(q->mqd_obj, false);
52818ee4ce6SJack Xiao if (unlikely(r != 0))
52918ee4ce6SJack Xiao goto clean_up;
53018ee4ce6SJack Xiao
53118ee4ce6SJack Xiao return 0;
53218ee4ce6SJack Xiao
53318ee4ce6SJack Xiao clean_up:
53418ee4ce6SJack Xiao amdgpu_bo_free_kernel(&q->mqd_obj,
53518ee4ce6SJack Xiao &q->mqd_gpu_addr,
53618ee4ce6SJack Xiao &q->mqd_cpu_ptr);
53718ee4ce6SJack Xiao return r;
53818ee4ce6SJack Xiao }
53918ee4ce6SJack Xiao
amdgpu_mes_queue_init_mqd(struct amdgpu_device * adev,struct amdgpu_mes_queue * q,struct amdgpu_mes_queue_properties * p)54018ee4ce6SJack Xiao static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
54118ee4ce6SJack Xiao struct amdgpu_mes_queue *q,
54218ee4ce6SJack Xiao struct amdgpu_mes_queue_properties *p)
54318ee4ce6SJack Xiao {
54418ee4ce6SJack Xiao struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
54518ee4ce6SJack Xiao struct amdgpu_mqd_prop mqd_prop = {0};
54618ee4ce6SJack Xiao
5475fa963d0SJack Xiao mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
5485fa963d0SJack Xiao mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
5495fa963d0SJack Xiao mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
5505fa963d0SJack Xiao mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
5515fa963d0SJack Xiao mqd_prop.queue_size = p->queue_size;
5525fa963d0SJack Xiao mqd_prop.use_doorbell = true;
5535fa963d0SJack Xiao mqd_prop.doorbell_index = p->doorbell_off;
5545fa963d0SJack Xiao mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
5555fa963d0SJack Xiao mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
5565fa963d0SJack Xiao mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
5575fa963d0SJack Xiao mqd_prop.hqd_active = false;
5585fa963d0SJack Xiao
559553d2683STim Huang if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
560553d2683STim Huang p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
561553d2683STim Huang mutex_lock(&adev->srbm_mutex);
562553d2683STim Huang amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
563553d2683STim Huang }
564553d2683STim Huang
5655fa963d0SJack Xiao mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
5665fa963d0SJack Xiao
567553d2683STim Huang if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
568553d2683STim Huang p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
569553d2683STim Huang amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
570553d2683STim Huang mutex_unlock(&adev->srbm_mutex);
571553d2683STim Huang }
572553d2683STim Huang
5735fa963d0SJack Xiao amdgpu_bo_unreserve(q->mqd_obj);
5745fa963d0SJack Xiao }
575be5609deSJack Xiao
amdgpu_mes_add_hw_queue(struct amdgpu_device * adev,int gang_id,struct amdgpu_mes_queue_properties * qprops,int * queue_id)576be5609deSJack Xiao int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
577be5609deSJack Xiao struct amdgpu_mes_queue_properties *qprops,
578be5609deSJack Xiao int *queue_id)
579be5609deSJack Xiao {
580be5609deSJack Xiao struct amdgpu_mes_queue *queue;
581be5609deSJack Xiao struct amdgpu_mes_gang *gang;
582be5609deSJack Xiao struct mes_add_queue_input queue_input;
583be5609deSJack Xiao unsigned long flags;
584be5609deSJack Xiao int r;
585be5609deSJack Xiao
5867a1c5c67SJonathan Kim memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
5877a1c5c67SJonathan Kim
58818ee4ce6SJack Xiao /* allocate the mes queue buffer */
58918ee4ce6SJack Xiao queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
59018ee4ce6SJack Xiao if (!queue) {
59118ee4ce6SJack Xiao DRM_ERROR("Failed to allocate memory for queue\n");
59218ee4ce6SJack Xiao return -ENOMEM;
59318ee4ce6SJack Xiao }
59418ee4ce6SJack Xiao
59518ee4ce6SJack Xiao /* Allocate the queue mqd */
59618ee4ce6SJack Xiao r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
59718ee4ce6SJack Xiao if (r)
59818ee4ce6SJack Xiao goto clean_up_memory;
59918ee4ce6SJack Xiao
60018ee4ce6SJack Xiao /*
60118ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
60218ee4ce6SJack Xiao * lock dependencies.
60318ee4ce6SJack Xiao */
60418ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
605be5609deSJack Xiao
606be5609deSJack Xiao gang = idr_find(&adev->mes.gang_id_idr, gang_id);
607be5609deSJack Xiao if (!gang) {
608be5609deSJack Xiao DRM_ERROR("gang id %d doesn't exist\n", gang_id);
60918ee4ce6SJack Xiao r = -EINVAL;
61018ee4ce6SJack Xiao goto clean_up_mqd;
611be5609deSJack Xiao }
612be5609deSJack Xiao
613be5609deSJack Xiao /* add the mes gang to idr list */
614be5609deSJack Xiao spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
615be5609deSJack Xiao r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
616be5609deSJack Xiao GFP_ATOMIC);
617be5609deSJack Xiao if (r < 0) {
618be5609deSJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
61918ee4ce6SJack Xiao goto clean_up_mqd;
620be5609deSJack Xiao }
621be5609deSJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
622be5609deSJack Xiao *queue_id = queue->queue_id = r;
623be5609deSJack Xiao
624be5609deSJack Xiao /* allocate a doorbell index for the queue */
625e3cbb1f4SShashank Sharma r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
626be5609deSJack Xiao qprops->queue_type,
627be5609deSJack Xiao &qprops->doorbell_off);
628be5609deSJack Xiao if (r)
629be5609deSJack Xiao goto clean_up_queue_id;
630be5609deSJack Xiao
631be5609deSJack Xiao /* initialize the queue mqd */
63218ee4ce6SJack Xiao amdgpu_mes_queue_init_mqd(adev, queue, qprops);
633be5609deSJack Xiao
634be5609deSJack Xiao /* add hw queue to mes */
635be5609deSJack Xiao queue_input.process_id = gang->process->pasid;
63618ee4ce6SJack Xiao
63718ee4ce6SJack Xiao queue_input.page_table_base_addr =
63818ee4ce6SJack Xiao adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
63918ee4ce6SJack Xiao adev->gmc.vram_start;
64018ee4ce6SJack Xiao
641be5609deSJack Xiao queue_input.process_va_start = 0;
642be5609deSJack Xiao queue_input.process_va_end =
643be5609deSJack Xiao (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
644be5609deSJack Xiao queue_input.process_quantum = gang->process->process_quantum;
645be5609deSJack Xiao queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
646be5609deSJack Xiao queue_input.gang_quantum = gang->gang_quantum;
647be5609deSJack Xiao queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
648be5609deSJack Xiao queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
649be5609deSJack Xiao queue_input.gang_global_priority_level = gang->global_priority_level;
650be5609deSJack Xiao queue_input.doorbell_offset = qprops->doorbell_off;
651be5609deSJack Xiao queue_input.mqd_addr = queue->mqd_gpu_addr;
652be5609deSJack Xiao queue_input.wptr_addr = qprops->wptr_gpu_addr;
653fe4e9ff9SJack Xiao queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
654be5609deSJack Xiao queue_input.queue_type = qprops->queue_type;
655be5609deSJack Xiao queue_input.paging = qprops->paging;
656a9579956SGraham Sider queue_input.is_kfd_process = 0;
657be5609deSJack Xiao
658be5609deSJack Xiao r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
659be5609deSJack Xiao if (r) {
660be5609deSJack Xiao DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
661be5609deSJack Xiao qprops->doorbell_off);
66218ee4ce6SJack Xiao goto clean_up_doorbell;
663be5609deSJack Xiao }
664be5609deSJack Xiao
665be5609deSJack Xiao DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
666be5609deSJack Xiao "queue type=%d, doorbell=0x%llx\n",
667be5609deSJack Xiao gang->process->pasid, gang_id, qprops->queue_type,
668be5609deSJack Xiao qprops->doorbell_off);
669be5609deSJack Xiao
670be5609deSJack Xiao queue->ring = qprops->ring;
671be5609deSJack Xiao queue->doorbell_off = qprops->doorbell_off;
672be5609deSJack Xiao queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
673be5609deSJack Xiao queue->queue_type = qprops->queue_type;
674be5609deSJack Xiao queue->paging = qprops->paging;
675be5609deSJack Xiao queue->gang = gang;
6762d7a1f71SLe Ma queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
677be5609deSJack Xiao list_add_tail(&queue->list, &gang->queue_list);
678be5609deSJack Xiao
67918ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
680be5609deSJack Xiao return 0;
681be5609deSJack Xiao
682be5609deSJack Xiao clean_up_doorbell:
683e3cbb1f4SShashank Sharma amdgpu_mes_kernel_doorbell_free(adev, gang->process,
684be5609deSJack Xiao qprops->doorbell_off);
685be5609deSJack Xiao clean_up_queue_id:
686be5609deSJack Xiao spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
687be5609deSJack Xiao idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
688be5609deSJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
68918ee4ce6SJack Xiao clean_up_mqd:
69018ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
69118ee4ce6SJack Xiao amdgpu_mes_queue_free_mqd(queue);
692be5609deSJack Xiao clean_up_memory:
693be5609deSJack Xiao kfree(queue);
694be5609deSJack Xiao return r;
695be5609deSJack Xiao }
696bcc4e1e1SJack Xiao
amdgpu_mes_remove_hw_queue(struct amdgpu_device * adev,int queue_id)697bcc4e1e1SJack Xiao int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
698bcc4e1e1SJack Xiao {
699bcc4e1e1SJack Xiao unsigned long flags;
700bcc4e1e1SJack Xiao struct amdgpu_mes_queue *queue;
701bcc4e1e1SJack Xiao struct amdgpu_mes_gang *gang;
702bcc4e1e1SJack Xiao struct mes_remove_queue_input queue_input;
703bcc4e1e1SJack Xiao int r;
704bcc4e1e1SJack Xiao
70518ee4ce6SJack Xiao /*
70618ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
70718ee4ce6SJack Xiao * lock dependencies.
70818ee4ce6SJack Xiao */
70918ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
710bcc4e1e1SJack Xiao
711bcc4e1e1SJack Xiao /* remove the mes gang from idr list */
712bcc4e1e1SJack Xiao spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
713bcc4e1e1SJack Xiao
714bcc4e1e1SJack Xiao queue = idr_find(&adev->mes.queue_id_idr, queue_id);
715bcc4e1e1SJack Xiao if (!queue) {
716bcc4e1e1SJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
71718ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
718bcc4e1e1SJack Xiao DRM_ERROR("queue id %d doesn't exist\n", queue_id);
719bcc4e1e1SJack Xiao return -EINVAL;
720bcc4e1e1SJack Xiao }
721bcc4e1e1SJack Xiao
722bcc4e1e1SJack Xiao idr_remove(&adev->mes.queue_id_idr, queue_id);
723bcc4e1e1SJack Xiao spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
724bcc4e1e1SJack Xiao
725bcc4e1e1SJack Xiao DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
726bcc4e1e1SJack Xiao queue->doorbell_off);
727bcc4e1e1SJack Xiao
728bcc4e1e1SJack Xiao gang = queue->gang;
729bcc4e1e1SJack Xiao queue_input.doorbell_offset = queue->doorbell_off;
730bcc4e1e1SJack Xiao queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
731bcc4e1e1SJack Xiao
732bcc4e1e1SJack Xiao r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
733bcc4e1e1SJack Xiao if (r)
734bcc4e1e1SJack Xiao DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
735bcc4e1e1SJack Xiao queue_id);
736bcc4e1e1SJack Xiao
737bcc4e1e1SJack Xiao list_del(&queue->list);
738e3cbb1f4SShashank Sharma amdgpu_mes_kernel_doorbell_free(adev, gang->process,
739bcc4e1e1SJack Xiao queue->doorbell_off);
74018ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
74118ee4ce6SJack Xiao
74218ee4ce6SJack Xiao amdgpu_mes_queue_free_mqd(queue);
743bcc4e1e1SJack Xiao kfree(queue);
744bcc4e1e1SJack Xiao return 0;
745bcc4e1e1SJack Xiao }
7461a27aacbSJack Xiao
amdgpu_mes_unmap_legacy_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)74718ee4ce6SJack Xiao int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
74818ee4ce6SJack Xiao struct amdgpu_ring *ring,
74918ee4ce6SJack Xiao enum amdgpu_unmap_queues_action action,
75018ee4ce6SJack Xiao u64 gpu_addr, u64 seq)
75118ee4ce6SJack Xiao {
75218ee4ce6SJack Xiao struct mes_unmap_legacy_queue_input queue_input;
75318ee4ce6SJack Xiao int r;
75418ee4ce6SJack Xiao
75518ee4ce6SJack Xiao queue_input.action = action;
75618ee4ce6SJack Xiao queue_input.queue_type = ring->funcs->type;
75718ee4ce6SJack Xiao queue_input.doorbell_offset = ring->doorbell_index;
75818ee4ce6SJack Xiao queue_input.pipe_id = ring->pipe;
75918ee4ce6SJack Xiao queue_input.queue_id = ring->queue;
76018ee4ce6SJack Xiao queue_input.trail_fence_addr = gpu_addr;
76118ee4ce6SJack Xiao queue_input.trail_fence_data = seq;
76218ee4ce6SJack Xiao
76318ee4ce6SJack Xiao r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
76418ee4ce6SJack Xiao if (r)
76518ee4ce6SJack Xiao DRM_ERROR("failed to unmap legacy queue\n");
76618ee4ce6SJack Xiao
76718ee4ce6SJack Xiao return r;
76818ee4ce6SJack Xiao }
76918ee4ce6SJack Xiao
amdgpu_mes_rreg(struct amdgpu_device * adev,uint32_t reg)770adc0e6abSJack Xiao uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
771adc0e6abSJack Xiao {
772adc0e6abSJack Xiao struct mes_misc_op_input op_input;
773adc0e6abSJack Xiao int r, val = 0;
774adc0e6abSJack Xiao
775adc0e6abSJack Xiao op_input.op = MES_MISC_OP_READ_REG;
776adc0e6abSJack Xiao op_input.read_reg.reg_offset = reg;
777adc0e6abSJack Xiao op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
778adc0e6abSJack Xiao
779adc0e6abSJack Xiao if (!adev->mes.funcs->misc_op) {
780adc0e6abSJack Xiao DRM_ERROR("mes rreg is not supported!\n");
781adc0e6abSJack Xiao goto error;
782adc0e6abSJack Xiao }
783adc0e6abSJack Xiao
784adc0e6abSJack Xiao r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
785adc0e6abSJack Xiao if (r)
786adc0e6abSJack Xiao DRM_ERROR("failed to read reg (0x%x)\n", reg);
787adc0e6abSJack Xiao else
788adc0e6abSJack Xiao val = *(adev->mes.read_val_ptr);
789adc0e6abSJack Xiao
790adc0e6abSJack Xiao error:
791adc0e6abSJack Xiao return val;
792adc0e6abSJack Xiao }
793adc0e6abSJack Xiao
amdgpu_mes_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t val)794adc0e6abSJack Xiao int amdgpu_mes_wreg(struct amdgpu_device *adev,
795adc0e6abSJack Xiao uint32_t reg, uint32_t val)
796adc0e6abSJack Xiao {
797adc0e6abSJack Xiao struct mes_misc_op_input op_input;
798adc0e6abSJack Xiao int r;
799adc0e6abSJack Xiao
800adc0e6abSJack Xiao op_input.op = MES_MISC_OP_WRITE_REG;
801adc0e6abSJack Xiao op_input.write_reg.reg_offset = reg;
802adc0e6abSJack Xiao op_input.write_reg.reg_value = val;
803adc0e6abSJack Xiao
804adc0e6abSJack Xiao if (!adev->mes.funcs->misc_op) {
805adc0e6abSJack Xiao DRM_ERROR("mes wreg is not supported!\n");
806adc0e6abSJack Xiao r = -EINVAL;
807adc0e6abSJack Xiao goto error;
808adc0e6abSJack Xiao }
809adc0e6abSJack Xiao
810adc0e6abSJack Xiao r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
811adc0e6abSJack Xiao if (r)
812adc0e6abSJack Xiao DRM_ERROR("failed to write reg (0x%x)\n", reg);
813adc0e6abSJack Xiao
814adc0e6abSJack Xiao error:
815adc0e6abSJack Xiao return r;
816adc0e6abSJack Xiao }
817adc0e6abSJack Xiao
amdgpu_mes_reg_write_reg_wait(struct amdgpu_device * adev,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)818adc0e6abSJack Xiao int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
819adc0e6abSJack Xiao uint32_t reg0, uint32_t reg1,
820adc0e6abSJack Xiao uint32_t ref, uint32_t mask)
821adc0e6abSJack Xiao {
822adc0e6abSJack Xiao struct mes_misc_op_input op_input;
823adc0e6abSJack Xiao int r;
824adc0e6abSJack Xiao
825adc0e6abSJack Xiao op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
826adc0e6abSJack Xiao op_input.wrm_reg.reg0 = reg0;
827adc0e6abSJack Xiao op_input.wrm_reg.reg1 = reg1;
828adc0e6abSJack Xiao op_input.wrm_reg.ref = ref;
829adc0e6abSJack Xiao op_input.wrm_reg.mask = mask;
830adc0e6abSJack Xiao
831adc0e6abSJack Xiao if (!adev->mes.funcs->misc_op) {
832adc0e6abSJack Xiao DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
833adc0e6abSJack Xiao r = -EINVAL;
834adc0e6abSJack Xiao goto error;
835adc0e6abSJack Xiao }
836adc0e6abSJack Xiao
837adc0e6abSJack Xiao r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
838adc0e6abSJack Xiao if (r)
839adc0e6abSJack Xiao DRM_ERROR("failed to reg_write_reg_wait\n");
840adc0e6abSJack Xiao
841adc0e6abSJack Xiao error:
842adc0e6abSJack Xiao return r;
843adc0e6abSJack Xiao }
844adc0e6abSJack Xiao
amdgpu_mes_reg_wait(struct amdgpu_device * adev,uint32_t reg,uint32_t val,uint32_t mask)845adc0e6abSJack Xiao int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
846adc0e6abSJack Xiao uint32_t val, uint32_t mask)
847adc0e6abSJack Xiao {
848adc0e6abSJack Xiao struct mes_misc_op_input op_input;
849adc0e6abSJack Xiao int r;
850adc0e6abSJack Xiao
851adc0e6abSJack Xiao op_input.op = MES_MISC_OP_WRM_REG_WAIT;
852adc0e6abSJack Xiao op_input.wrm_reg.reg0 = reg;
853adc0e6abSJack Xiao op_input.wrm_reg.ref = val;
854adc0e6abSJack Xiao op_input.wrm_reg.mask = mask;
855adc0e6abSJack Xiao
856adc0e6abSJack Xiao if (!adev->mes.funcs->misc_op) {
857adc0e6abSJack Xiao DRM_ERROR("mes reg wait is not supported!\n");
858adc0e6abSJack Xiao r = -EINVAL;
859adc0e6abSJack Xiao goto error;
860adc0e6abSJack Xiao }
861adc0e6abSJack Xiao
862adc0e6abSJack Xiao r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
863adc0e6abSJack Xiao if (r)
864adc0e6abSJack Xiao DRM_ERROR("failed to reg_write_reg_wait\n");
865adc0e6abSJack Xiao
866adc0e6abSJack Xiao error:
867adc0e6abSJack Xiao return r;
868adc0e6abSJack Xiao }
869adc0e6abSJack Xiao
amdgpu_mes_set_shader_debugger(struct amdgpu_device * adev,uint64_t process_context_addr,uint32_t spi_gdbg_per_vmid_cntl,const uint32_t * tcp_watch_cntl,uint32_t flags,bool trap_en)870a9818854SJonathan Kim int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
871a9818854SJonathan Kim uint64_t process_context_addr,
872a9818854SJonathan Kim uint32_t spi_gdbg_per_vmid_cntl,
873a9818854SJonathan Kim const uint32_t *tcp_watch_cntl,
87409d49e14SJonathan Kim uint32_t flags,
87509d49e14SJonathan Kim bool trap_en)
876a9818854SJonathan Kim {
877a9818854SJonathan Kim struct mes_misc_op_input op_input = {0};
878a9818854SJonathan Kim int r;
879a9818854SJonathan Kim
880a9818854SJonathan Kim if (!adev->mes.funcs->misc_op) {
881a9818854SJonathan Kim DRM_ERROR("mes set shader debugger is not supported!\n");
882a9818854SJonathan Kim return -EINVAL;
883a9818854SJonathan Kim }
884a9818854SJonathan Kim
885a9818854SJonathan Kim op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
886a9818854SJonathan Kim op_input.set_shader_debugger.process_context_addr = process_context_addr;
887a9818854SJonathan Kim op_input.set_shader_debugger.flags.u32all = flags;
8883a950c56SJonathan Kim
8893a950c56SJonathan Kim /* use amdgpu mes_flush_shader_debugger instead */
8903a950c56SJonathan Kim if (op_input.set_shader_debugger.flags.process_ctx_flush)
8913a950c56SJonathan Kim return -EINVAL;
8923a950c56SJonathan Kim
893a9818854SJonathan Kim op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
894a9818854SJonathan Kim memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
895a9818854SJonathan Kim sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
896a9818854SJonathan Kim
89709d49e14SJonathan Kim if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
89809d49e14SJonathan Kim AMDGPU_MES_API_VERSION_SHIFT) >= 14)
89909d49e14SJonathan Kim op_input.set_shader_debugger.trap_en = trap_en;
90009d49e14SJonathan Kim
901a9818854SJonathan Kim amdgpu_mes_lock(&adev->mes);
902a9818854SJonathan Kim
903a9818854SJonathan Kim r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
904a9818854SJonathan Kim if (r)
905a9818854SJonathan Kim DRM_ERROR("failed to set_shader_debugger\n");
906a9818854SJonathan Kim
907a9818854SJonathan Kim amdgpu_mes_unlock(&adev->mes);
908a9818854SJonathan Kim
909a9818854SJonathan Kim return r;
910a9818854SJonathan Kim }
911a9818854SJonathan Kim
amdgpu_mes_flush_shader_debugger(struct amdgpu_device * adev,uint64_t process_context_addr)9123a950c56SJonathan Kim int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
9133a950c56SJonathan Kim uint64_t process_context_addr)
9143a950c56SJonathan Kim {
9153a950c56SJonathan Kim struct mes_misc_op_input op_input = {0};
9163a950c56SJonathan Kim int r;
9173a950c56SJonathan Kim
9183a950c56SJonathan Kim if (!adev->mes.funcs->misc_op) {
9193a950c56SJonathan Kim DRM_ERROR("mes flush shader debugger is not supported!\n");
9203a950c56SJonathan Kim return -EINVAL;
9213a950c56SJonathan Kim }
9223a950c56SJonathan Kim
9233a950c56SJonathan Kim op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
9243a950c56SJonathan Kim op_input.set_shader_debugger.process_context_addr = process_context_addr;
9253a950c56SJonathan Kim op_input.set_shader_debugger.flags.process_ctx_flush = true;
9263a950c56SJonathan Kim
9273a950c56SJonathan Kim amdgpu_mes_lock(&adev->mes);
9283a950c56SJonathan Kim
9293a950c56SJonathan Kim r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
9303a950c56SJonathan Kim if (r)
9313a950c56SJonathan Kim DRM_ERROR("failed to set_shader_debugger\n");
9323a950c56SJonathan Kim
9333a950c56SJonathan Kim amdgpu_mes_unlock(&adev->mes);
9343a950c56SJonathan Kim
9353a950c56SJonathan Kim return r;
9363a950c56SJonathan Kim }
9373a950c56SJonathan Kim
9381a27aacbSJack Xiao static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device * adev,struct amdgpu_ring * ring,struct amdgpu_mes_queue_properties * props)9391a27aacbSJack Xiao amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
9401a27aacbSJack Xiao struct amdgpu_ring *ring,
9411a27aacbSJack Xiao struct amdgpu_mes_queue_properties *props)
9421a27aacbSJack Xiao {
9431a27aacbSJack Xiao props->queue_type = ring->funcs->type;
9441a27aacbSJack Xiao props->hqd_base_gpu_addr = ring->gpu_addr;
9451a27aacbSJack Xiao props->rptr_gpu_addr = ring->rptr_gpu_addr;
9461a27aacbSJack Xiao props->wptr_gpu_addr = ring->wptr_gpu_addr;
947fe4e9ff9SJack Xiao props->wptr_mc_addr =
948fe4e9ff9SJack Xiao ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
9491a27aacbSJack Xiao props->queue_size = ring->ring_size;
9501a27aacbSJack Xiao props->eop_gpu_addr = ring->eop_gpu_addr;
9511a27aacbSJack Xiao props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
9521a27aacbSJack Xiao props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
9531a27aacbSJack Xiao props->paging = false;
9541a27aacbSJack Xiao props->ring = ring;
9551a27aacbSJack Xiao }
95611ec5b36SJack Xiao
95711ec5b36SJack Xiao #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \
95811ec5b36SJack Xiao do { \
95911ec5b36SJack Xiao if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \
96011ec5b36SJack Xiao return offsetof(struct amdgpu_mes_ctx_meta_data, \
96111ec5b36SJack Xiao _eng[ring->idx].slots[id_offs]); \
96211ec5b36SJack Xiao else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \
96311ec5b36SJack Xiao return offsetof(struct amdgpu_mes_ctx_meta_data, \
96411ec5b36SJack Xiao _eng[ring->idx].ring); \
96511ec5b36SJack Xiao else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \
96611ec5b36SJack Xiao return offsetof(struct amdgpu_mes_ctx_meta_data, \
96711ec5b36SJack Xiao _eng[ring->idx].ib); \
96811ec5b36SJack Xiao else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \
96911ec5b36SJack Xiao return offsetof(struct amdgpu_mes_ctx_meta_data, \
97011ec5b36SJack Xiao _eng[ring->idx].padding); \
97111ec5b36SJack Xiao } while(0)
97211ec5b36SJack Xiao
amdgpu_mes_ctx_get_offs(struct amdgpu_ring * ring,unsigned int id_offs)97311ec5b36SJack Xiao int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
97411ec5b36SJack Xiao {
97511ec5b36SJack Xiao switch (ring->funcs->type) {
97611ec5b36SJack Xiao case AMDGPU_RING_TYPE_GFX:
97711ec5b36SJack Xiao DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
97811ec5b36SJack Xiao break;
97911ec5b36SJack Xiao case AMDGPU_RING_TYPE_COMPUTE:
98011ec5b36SJack Xiao DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
98111ec5b36SJack Xiao break;
98211ec5b36SJack Xiao case AMDGPU_RING_TYPE_SDMA:
98311ec5b36SJack Xiao DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
98411ec5b36SJack Xiao break;
98511ec5b36SJack Xiao default:
98611ec5b36SJack Xiao break;
98711ec5b36SJack Xiao }
98811ec5b36SJack Xiao
98911ec5b36SJack Xiao WARN_ON(1);
99011ec5b36SJack Xiao return -EINVAL;
99111ec5b36SJack Xiao }
992d0c423b6SJack Xiao
amdgpu_mes_add_ring(struct amdgpu_device * adev,int gang_id,int queue_type,int idx,struct amdgpu_mes_ctx_data * ctx_data,struct amdgpu_ring ** out)993d0c423b6SJack Xiao int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
994d0c423b6SJack Xiao int queue_type, int idx,
995d0c423b6SJack Xiao struct amdgpu_mes_ctx_data *ctx_data,
996d0c423b6SJack Xiao struct amdgpu_ring **out)
997d0c423b6SJack Xiao {
998d0c423b6SJack Xiao struct amdgpu_ring *ring;
999d0c423b6SJack Xiao struct amdgpu_mes_gang *gang;
1000d0c423b6SJack Xiao struct amdgpu_mes_queue_properties qprops = {0};
1001d0c423b6SJack Xiao int r, queue_id, pasid;
1002d0c423b6SJack Xiao
100318ee4ce6SJack Xiao /*
100418ee4ce6SJack Xiao * Avoid taking any other locks under MES lock to avoid circular
100518ee4ce6SJack Xiao * lock dependencies.
100618ee4ce6SJack Xiao */
100718ee4ce6SJack Xiao amdgpu_mes_lock(&adev->mes);
1008d0c423b6SJack Xiao gang = idr_find(&adev->mes.gang_id_idr, gang_id);
1009d0c423b6SJack Xiao if (!gang) {
1010d0c423b6SJack Xiao DRM_ERROR("gang id %d doesn't exist\n", gang_id);
101118ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
1012d0c423b6SJack Xiao return -EINVAL;
1013d0c423b6SJack Xiao }
1014d0c423b6SJack Xiao pasid = gang->process->pasid;
1015d0c423b6SJack Xiao
1016d0c423b6SJack Xiao ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1017d0c423b6SJack Xiao if (!ring) {
101818ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
1019d0c423b6SJack Xiao return -ENOMEM;
1020d0c423b6SJack Xiao }
1021d0c423b6SJack Xiao
1022d0c423b6SJack Xiao ring->ring_obj = NULL;
1023d0c423b6SJack Xiao ring->use_doorbell = true;
1024d0c423b6SJack Xiao ring->is_mes_queue = true;
1025d0c423b6SJack Xiao ring->mes_ctx = ctx_data;
1026d0c423b6SJack Xiao ring->idx = idx;
1027d0c423b6SJack Xiao ring->no_scheduler = true;
1028d0c423b6SJack Xiao
1029d0c423b6SJack Xiao if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030d0c423b6SJack Xiao int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1031d0c423b6SJack Xiao compute[ring->idx].mec_hpd);
1032d0c423b6SJack Xiao ring->eop_gpu_addr =
1033d0c423b6SJack Xiao amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1034d0c423b6SJack Xiao }
1035d0c423b6SJack Xiao
1036d0c423b6SJack Xiao switch (queue_type) {
1037d0c423b6SJack Xiao case AMDGPU_RING_TYPE_GFX:
1038d0c423b6SJack Xiao ring->funcs = adev->gfx.gfx_ring[0].funcs;
1039553d2683STim Huang ring->me = adev->gfx.gfx_ring[0].me;
1040553d2683STim Huang ring->pipe = adev->gfx.gfx_ring[0].pipe;
1041d0c423b6SJack Xiao break;
1042d0c423b6SJack Xiao case AMDGPU_RING_TYPE_COMPUTE:
1043d0c423b6SJack Xiao ring->funcs = adev->gfx.compute_ring[0].funcs;
1044553d2683STim Huang ring->me = adev->gfx.compute_ring[0].me;
1045553d2683STim Huang ring->pipe = adev->gfx.compute_ring[0].pipe;
1046d0c423b6SJack Xiao break;
1047d0c423b6SJack Xiao case AMDGPU_RING_TYPE_SDMA:
1048d0c423b6SJack Xiao ring->funcs = adev->sdma.instance[0].ring.funcs;
1049d0c423b6SJack Xiao break;
1050d0c423b6SJack Xiao default:
1051d0c423b6SJack Xiao BUG();
1052d0c423b6SJack Xiao }
1053d0c423b6SJack Xiao
1054d0c423b6SJack Xiao r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1055d0c423b6SJack Xiao AMDGPU_RING_PRIO_DEFAULT, NULL);
1056*65ca9f8dSSrinivasan Shanmugam if (r) {
1057*65ca9f8dSSrinivasan Shanmugam amdgpu_mes_unlock(&adev->mes);
1058d0c423b6SJack Xiao goto clean_up_memory;
1059*65ca9f8dSSrinivasan Shanmugam }
1060d0c423b6SJack Xiao
1061d0c423b6SJack Xiao amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1062d0c423b6SJack Xiao
1063d0c423b6SJack Xiao dma_fence_wait(gang->process->vm->last_update, false);
1064d0c423b6SJack Xiao dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
106518ee4ce6SJack Xiao amdgpu_mes_unlock(&adev->mes);
1066d0c423b6SJack Xiao
1067d0c423b6SJack Xiao r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1068d0c423b6SJack Xiao if (r)
1069d0c423b6SJack Xiao goto clean_up_ring;
1070d0c423b6SJack Xiao
1071d0c423b6SJack Xiao ring->hw_queue_id = queue_id;
1072d0c423b6SJack Xiao ring->doorbell_index = qprops.doorbell_off;
1073d0c423b6SJack Xiao
1074d0c423b6SJack Xiao if (queue_type == AMDGPU_RING_TYPE_GFX)
1075d0c423b6SJack Xiao sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
1076d0c423b6SJack Xiao else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1077d0c423b6SJack Xiao sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1078d0c423b6SJack Xiao queue_id);
1079d0c423b6SJack Xiao else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1080d0c423b6SJack Xiao sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1081d0c423b6SJack Xiao queue_id);
1082d0c423b6SJack Xiao else
1083d0c423b6SJack Xiao BUG();
1084d0c423b6SJack Xiao
1085d0c423b6SJack Xiao *out = ring;
1086d0c423b6SJack Xiao return 0;
1087d0c423b6SJack Xiao
1088d0c423b6SJack Xiao clean_up_ring:
1089d0c423b6SJack Xiao amdgpu_ring_fini(ring);
1090d0c423b6SJack Xiao clean_up_memory:
1091d0c423b6SJack Xiao kfree(ring);
1092d0c423b6SJack Xiao return r;
1093d0c423b6SJack Xiao }
10949cc654c8SJack Xiao
amdgpu_mes_remove_ring(struct amdgpu_device * adev,struct amdgpu_ring * ring)10959cc654c8SJack Xiao void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
10969cc654c8SJack Xiao struct amdgpu_ring *ring)
10979cc654c8SJack Xiao {
10989cc654c8SJack Xiao if (!ring)
10999cc654c8SJack Xiao return;
11009cc654c8SJack Xiao
11019cc654c8SJack Xiao amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
110239cfce75SJack Xiao del_timer_sync(&ring->fence_drv.fallback_timer);
11039cc654c8SJack Xiao amdgpu_ring_fini(ring);
11049cc654c8SJack Xiao kfree(ring);
11059cc654c8SJack Xiao }
1106e3652b09SJack Xiao
amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device * adev,enum amdgpu_mes_priority_level prio)11072d7a1f71SLe Ma uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
11082d7a1f71SLe Ma enum amdgpu_mes_priority_level prio)
11092d7a1f71SLe Ma {
11102d7a1f71SLe Ma return adev->mes.aggregated_doorbells[prio];
11112d7a1f71SLe Ma }
11122d7a1f71SLe Ma
amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device * adev,struct amdgpu_mes_ctx_data * ctx_data)1113e3652b09SJack Xiao int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
1114e3652b09SJack Xiao struct amdgpu_mes_ctx_data *ctx_data)
1115e3652b09SJack Xiao {
1116e3652b09SJack Xiao int r;
1117e3652b09SJack Xiao
1118e3652b09SJack Xiao r = amdgpu_bo_create_kernel(adev,
1119e3652b09SJack Xiao sizeof(struct amdgpu_mes_ctx_meta_data),
1120e3652b09SJack Xiao PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1121fe4e9ff9SJack Xiao &ctx_data->meta_data_obj,
1122fe4e9ff9SJack Xiao &ctx_data->meta_data_mc_addr,
1123e3652b09SJack Xiao &ctx_data->meta_data_ptr);
11240b9ff428SLee Jones if (r) {
11250b9ff428SLee Jones dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
11260b9ff428SLee Jones return r;
11270b9ff428SLee Jones }
11280b9ff428SLee Jones
1129e3652b09SJack Xiao if (!ctx_data->meta_data_obj)
1130e3652b09SJack Xiao return -ENOMEM;
1131e3652b09SJack Xiao
1132e3652b09SJack Xiao memset(ctx_data->meta_data_ptr, 0,
1133e3652b09SJack Xiao sizeof(struct amdgpu_mes_ctx_meta_data));
1134e3652b09SJack Xiao
1135e3652b09SJack Xiao return 0;
1136e3652b09SJack Xiao }
1137e3652b09SJack Xiao
amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data * ctx_data)1138e3652b09SJack Xiao void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
1139e3652b09SJack Xiao {
1140e3652b09SJack Xiao if (ctx_data->meta_data_obj)
1141fe4e9ff9SJack Xiao amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
1142fe4e9ff9SJack Xiao &ctx_data->meta_data_mc_addr,
1143fe4e9ff9SJack Xiao &ctx_data->meta_data_ptr);
1144e3652b09SJack Xiao }
1145a22f760aSJack Xiao
amdgpu_mes_ctx_map_meta_data(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct amdgpu_mes_ctx_data * ctx_data)11467c18b40eSJack Xiao int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
1147a22f760aSJack Xiao struct amdgpu_vm *vm,
1148a22f760aSJack Xiao struct amdgpu_mes_ctx_data *ctx_data)
1149a22f760aSJack Xiao {
11507c18b40eSJack Xiao struct amdgpu_bo_va *bo_va;
11517c18b40eSJack Xiao struct amdgpu_sync sync;
11522acc73f8SChristian König struct drm_exec exec;
1153a22f760aSJack Xiao int r;
1154a22f760aSJack Xiao
11557c18b40eSJack Xiao amdgpu_sync_create(&sync);
1156a22f760aSJack Xiao
11572acc73f8SChristian König drm_exec_init(&exec, 0);
11582acc73f8SChristian König drm_exec_until_all_locked(&exec) {
11592acc73f8SChristian König r = drm_exec_lock_obj(&exec,
11602acc73f8SChristian König &ctx_data->meta_data_obj->tbo.base);
11612acc73f8SChristian König drm_exec_retry_on_contention(&exec);
11622acc73f8SChristian König if (unlikely(r))
11632acc73f8SChristian König goto error_fini_exec;
11647c18b40eSJack Xiao
11652acc73f8SChristian König r = amdgpu_vm_lock_pd(vm, &exec, 0);
11662acc73f8SChristian König drm_exec_retry_on_contention(&exec);
11672acc73f8SChristian König if (unlikely(r))
11682acc73f8SChristian König goto error_fini_exec;
11697c18b40eSJack Xiao }
11707c18b40eSJack Xiao
11717c18b40eSJack Xiao bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
11727c18b40eSJack Xiao if (!bo_va) {
11737c18b40eSJack Xiao DRM_ERROR("failed to create bo_va for meta data BO\n");
11742acc73f8SChristian König r = -ENOMEM;
11752acc73f8SChristian König goto error_fini_exec;
11767c18b40eSJack Xiao }
11777c18b40eSJack Xiao
11787c18b40eSJack Xiao r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
11797c18b40eSJack Xiao sizeof(struct amdgpu_mes_ctx_meta_data),
11807c18b40eSJack Xiao AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
11817c18b40eSJack Xiao AMDGPU_PTE_EXECUTABLE);
11827c18b40eSJack Xiao
11837c18b40eSJack Xiao if (r) {
11847c18b40eSJack Xiao DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
11852acc73f8SChristian König goto error_del_bo_va;
11867c18b40eSJack Xiao }
11877c18b40eSJack Xiao
11887c18b40eSJack Xiao r = amdgpu_vm_bo_update(adev, bo_va, false);
11897c18b40eSJack Xiao if (r) {
11907c18b40eSJack Xiao DRM_ERROR("failed to do vm_bo_update on meta data\n");
11912acc73f8SChristian König goto error_del_bo_va;
11927c18b40eSJack Xiao }
11937c18b40eSJack Xiao amdgpu_sync_fence(&sync, bo_va->last_pt_update);
1194a22f760aSJack Xiao
1195a22f760aSJack Xiao r = amdgpu_vm_update_pdes(adev, vm, false);
11967c18b40eSJack Xiao if (r) {
11977c18b40eSJack Xiao DRM_ERROR("failed to update pdes on meta data\n");
11982acc73f8SChristian König goto error_del_bo_va;
11997c18b40eSJack Xiao }
12007c18b40eSJack Xiao amdgpu_sync_fence(&sync, vm->last_update);
1201a22f760aSJack Xiao
12027c18b40eSJack Xiao amdgpu_sync_wait(&sync, false);
12032acc73f8SChristian König drm_exec_fini(&exec);
1204a22f760aSJack Xiao
12057c18b40eSJack Xiao amdgpu_sync_free(&sync);
12067c18b40eSJack Xiao ctx_data->meta_data_va = bo_va;
1207a22f760aSJack Xiao return 0;
1208a22f760aSJack Xiao
12092acc73f8SChristian König error_del_bo_va:
12107c18b40eSJack Xiao amdgpu_vm_bo_del(adev, bo_va);
12112acc73f8SChristian König
12122acc73f8SChristian König error_fini_exec:
12132acc73f8SChristian König drm_exec_fini(&exec);
12147c18b40eSJack Xiao amdgpu_sync_free(&sync);
1215a22f760aSJack Xiao return r;
1216a22f760aSJack Xiao }
1217f1d93c9cSJack Xiao
amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device * adev,struct amdgpu_mes_ctx_data * ctx_data)1218737dad0bSJack Xiao int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
1219737dad0bSJack Xiao struct amdgpu_mes_ctx_data *ctx_data)
1220737dad0bSJack Xiao {
1221737dad0bSJack Xiao struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
1222737dad0bSJack Xiao struct amdgpu_bo *bo = ctx_data->meta_data_obj;
1223737dad0bSJack Xiao struct amdgpu_vm *vm = bo_va->base.vm;
12242acc73f8SChristian König struct dma_fence *fence;
12252acc73f8SChristian König struct drm_exec exec;
12262acc73f8SChristian König long r;
1227737dad0bSJack Xiao
12282acc73f8SChristian König drm_exec_init(&exec, 0);
12292acc73f8SChristian König drm_exec_until_all_locked(&exec) {
12302acc73f8SChristian König r = drm_exec_lock_obj(&exec,
12312acc73f8SChristian König &ctx_data->meta_data_obj->tbo.base);
12322acc73f8SChristian König drm_exec_retry_on_contention(&exec);
12332acc73f8SChristian König if (unlikely(r))
12342acc73f8SChristian König goto out_unlock;
1235737dad0bSJack Xiao
12362acc73f8SChristian König r = amdgpu_vm_lock_pd(vm, &exec, 0);
12372acc73f8SChristian König drm_exec_retry_on_contention(&exec);
12382acc73f8SChristian König if (unlikely(r))
12392acc73f8SChristian König goto out_unlock;
1240737dad0bSJack Xiao }
1241737dad0bSJack Xiao
1242737dad0bSJack Xiao amdgpu_vm_bo_del(adev, bo_va);
1243737dad0bSJack Xiao if (!amdgpu_vm_ready(vm))
1244737dad0bSJack Xiao goto out_unlock;
1245737dad0bSJack Xiao
12462acc73f8SChristian König r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
12472acc73f8SChristian König &fence);
1248737dad0bSJack Xiao if (r)
1249737dad0bSJack Xiao goto out_unlock;
1250737dad0bSJack Xiao if (fence) {
1251737dad0bSJack Xiao amdgpu_bo_fence(bo, fence, true);
1252737dad0bSJack Xiao fence = NULL;
1253737dad0bSJack Xiao }
1254737dad0bSJack Xiao
1255737dad0bSJack Xiao r = amdgpu_vm_clear_freed(adev, vm, &fence);
1256737dad0bSJack Xiao if (r || !fence)
1257737dad0bSJack Xiao goto out_unlock;
1258737dad0bSJack Xiao
1259737dad0bSJack Xiao dma_fence_wait(fence, false);
1260737dad0bSJack Xiao amdgpu_bo_fence(bo, fence, true);
1261737dad0bSJack Xiao dma_fence_put(fence);
1262737dad0bSJack Xiao
1263737dad0bSJack Xiao out_unlock:
1264737dad0bSJack Xiao if (unlikely(r < 0))
1265737dad0bSJack Xiao dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
12662acc73f8SChristian König drm_exec_fini(&exec);
1267737dad0bSJack Xiao
1268737dad0bSJack Xiao return r;
1269737dad0bSJack Xiao }
1270737dad0bSJack Xiao
amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device * adev,int pasid,int * gang_id,int queue_type,int num_queue,struct amdgpu_ring ** added_rings,struct amdgpu_mes_ctx_data * ctx_data)1271f1d93c9cSJack Xiao static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
1272f1d93c9cSJack Xiao int pasid, int *gang_id,
1273f1d93c9cSJack Xiao int queue_type, int num_queue,
1274f1d93c9cSJack Xiao struct amdgpu_ring **added_rings,
1275f1d93c9cSJack Xiao struct amdgpu_mes_ctx_data *ctx_data)
1276f1d93c9cSJack Xiao {
1277f1d93c9cSJack Xiao struct amdgpu_ring *ring;
1278f1d93c9cSJack Xiao struct amdgpu_mes_gang_properties gprops = {0};
1279f1d93c9cSJack Xiao int r, j;
1280f1d93c9cSJack Xiao
1281f1d93c9cSJack Xiao /* create a gang for the process */
1282f1d93c9cSJack Xiao gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1283f1d93c9cSJack Xiao gprops.gang_quantum = adev->mes.default_gang_quantum;
1284f1d93c9cSJack Xiao gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1285f1d93c9cSJack Xiao gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1286f1d93c9cSJack Xiao gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1287f1d93c9cSJack Xiao
1288f1d93c9cSJack Xiao r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
1289f1d93c9cSJack Xiao if (r) {
1290f1d93c9cSJack Xiao DRM_ERROR("failed to add gang\n");
1291f1d93c9cSJack Xiao return r;
1292f1d93c9cSJack Xiao }
1293f1d93c9cSJack Xiao
1294f1d93c9cSJack Xiao /* create queues for the gang */
1295f1d93c9cSJack Xiao for (j = 0; j < num_queue; j++) {
1296f1d93c9cSJack Xiao r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
1297f1d93c9cSJack Xiao ctx_data, &ring);
1298f1d93c9cSJack Xiao if (r) {
1299f1d93c9cSJack Xiao DRM_ERROR("failed to add ring\n");
1300f1d93c9cSJack Xiao break;
1301f1d93c9cSJack Xiao }
1302f1d93c9cSJack Xiao
1303f1d93c9cSJack Xiao DRM_INFO("ring %s was added\n", ring->name);
1304f1d93c9cSJack Xiao added_rings[j] = ring;
1305f1d93c9cSJack Xiao }
1306f1d93c9cSJack Xiao
1307f1d93c9cSJack Xiao return 0;
1308f1d93c9cSJack Xiao }
1309cdb7476dSJack Xiao
amdgpu_mes_test_queues(struct amdgpu_ring ** added_rings)1310cdb7476dSJack Xiao static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
1311cdb7476dSJack Xiao {
1312cdb7476dSJack Xiao struct amdgpu_ring *ring;
1313cdb7476dSJack Xiao int i, r;
1314cdb7476dSJack Xiao
1315cdb7476dSJack Xiao for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
1316cdb7476dSJack Xiao ring = added_rings[i];
1317cdb7476dSJack Xiao if (!ring)
1318cdb7476dSJack Xiao continue;
1319cdb7476dSJack Xiao
132093ab59acSGuchun Chen r = amdgpu_ring_test_helper(ring);
132193ab59acSGuchun Chen if (r)
1322cdb7476dSJack Xiao return r;
1323cdb7476dSJack Xiao
1324cdb7476dSJack Xiao r = amdgpu_ring_test_ib(ring, 1000 * 10);
1325cdb7476dSJack Xiao if (r) {
1326cdb7476dSJack Xiao DRM_DEV_ERROR(ring->adev->dev,
1327cdb7476dSJack Xiao "ring %s ib test failed (%d)\n",
1328cdb7476dSJack Xiao ring->name, r);
1329cdb7476dSJack Xiao return r;
1330cdb7476dSJack Xiao } else
1331cdb7476dSJack Xiao DRM_INFO("ring %s ib test pass\n", ring->name);
1332cdb7476dSJack Xiao }
1333cdb7476dSJack Xiao
1334cdb7476dSJack Xiao return 0;
1335cdb7476dSJack Xiao }
13366624d161SJack Xiao
amdgpu_mes_self_test(struct amdgpu_device * adev)13376624d161SJack Xiao int amdgpu_mes_self_test(struct amdgpu_device *adev)
13386624d161SJack Xiao {
13396624d161SJack Xiao struct amdgpu_vm *vm = NULL;
13406624d161SJack Xiao struct amdgpu_mes_ctx_data ctx_data = {0};
13416624d161SJack Xiao struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
13426624d161SJack Xiao int gang_ids[3] = {0};
13435ee33d90SJack Xiao int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
13445ee33d90SJack Xiao { AMDGPU_RING_TYPE_COMPUTE, 1 },
13455ee33d90SJack Xiao { AMDGPU_RING_TYPE_SDMA, 1} };
13466624d161SJack Xiao int i, r, pasid, k = 0;
13476624d161SJack Xiao
13486624d161SJack Xiao pasid = amdgpu_pasid_alloc(16);
13496624d161SJack Xiao if (pasid < 0) {
13506624d161SJack Xiao dev_warn(adev->dev, "No more PASIDs available!");
13516624d161SJack Xiao pasid = 0;
13526624d161SJack Xiao }
13536624d161SJack Xiao
13546624d161SJack Xiao vm = kzalloc(sizeof(*vm), GFP_KERNEL);
13556624d161SJack Xiao if (!vm) {
13566624d161SJack Xiao r = -ENOMEM;
13576624d161SJack Xiao goto error_pasid;
13586624d161SJack Xiao }
13596624d161SJack Xiao
13605003ca63SGuchun Chen r = amdgpu_vm_init(adev, vm, -1);
13616624d161SJack Xiao if (r) {
13626624d161SJack Xiao DRM_ERROR("failed to initialize vm\n");
13636624d161SJack Xiao goto error_pasid;
13646624d161SJack Xiao }
13656624d161SJack Xiao
13666624d161SJack Xiao r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
13676624d161SJack Xiao if (r) {
13686624d161SJack Xiao DRM_ERROR("failed to alloc ctx meta data\n");
1369c3c48339SJianglei Nie goto error_fini;
13706624d161SJack Xiao }
13716624d161SJack Xiao
13727c18b40eSJack Xiao ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
13737c18b40eSJack Xiao r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
13746624d161SJack Xiao if (r) {
13756624d161SJack Xiao DRM_ERROR("failed to map ctx meta data\n");
13766624d161SJack Xiao goto error_vm;
13776624d161SJack Xiao }
13786624d161SJack Xiao
13796624d161SJack Xiao r = amdgpu_mes_create_process(adev, pasid, vm);
13806624d161SJack Xiao if (r) {
13816624d161SJack Xiao DRM_ERROR("failed to create MES process\n");
13826624d161SJack Xiao goto error_vm;
13836624d161SJack Xiao }
13846624d161SJack Xiao
13856624d161SJack Xiao for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
138618ee4ce6SJack Xiao /* On GFX v10.3, fw hasn't supported to map sdma queue. */
138718ee4ce6SJack Xiao if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
138818ee4ce6SJack Xiao adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
138918ee4ce6SJack Xiao queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
1390da1c0338SJack Xiao continue;
1391da1c0338SJack Xiao
13926624d161SJack Xiao r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
13936624d161SJack Xiao &gang_ids[i],
13946624d161SJack Xiao queue_types[i][0],
13956624d161SJack Xiao queue_types[i][1],
13966624d161SJack Xiao &added_rings[k],
13976624d161SJack Xiao &ctx_data);
13986624d161SJack Xiao if (r)
13996624d161SJack Xiao goto error_queues;
14006624d161SJack Xiao
14016624d161SJack Xiao k += queue_types[i][1];
14026624d161SJack Xiao }
14036624d161SJack Xiao
14046624d161SJack Xiao /* start ring test and ib test for MES queues */
14056624d161SJack Xiao amdgpu_mes_test_queues(added_rings);
14066624d161SJack Xiao
14076624d161SJack Xiao error_queues:
14086624d161SJack Xiao /* remove all queues */
14096624d161SJack Xiao for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
14106624d161SJack Xiao if (!added_rings[i])
14116624d161SJack Xiao continue;
14126624d161SJack Xiao amdgpu_mes_remove_ring(adev, added_rings[i]);
14136624d161SJack Xiao }
14146624d161SJack Xiao
14156624d161SJack Xiao for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
14166624d161SJack Xiao if (!gang_ids[i])
14176624d161SJack Xiao continue;
14186624d161SJack Xiao amdgpu_mes_remove_gang(adev, gang_ids[i]);
14196624d161SJack Xiao }
14206624d161SJack Xiao
14216624d161SJack Xiao amdgpu_mes_destroy_process(adev, pasid);
14226624d161SJack Xiao
14236624d161SJack Xiao error_vm:
1424737dad0bSJack Xiao amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
1425c3c48339SJianglei Nie
1426c3c48339SJianglei Nie error_fini:
14276624d161SJack Xiao amdgpu_vm_fini(adev, vm);
14286624d161SJack Xiao
14296624d161SJack Xiao error_pasid:
14306624d161SJack Xiao if (pasid)
14316624d161SJack Xiao amdgpu_pasid_free(pasid);
14326624d161SJack Xiao
14336624d161SJack Xiao amdgpu_mes_ctx_free_meta_data(&ctx_data);
14346624d161SJack Xiao kfree(vm);
14356624d161SJack Xiao return 0;
14366624d161SJack Xiao }
1437cc42e76eSMario Limonciello
amdgpu_mes_init_microcode(struct amdgpu_device * adev,int pipe)1438cc42e76eSMario Limonciello int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
1439cc42e76eSMario Limonciello {
1440cc42e76eSMario Limonciello const struct mes_firmware_header_v1_0 *mes_hdr;
1441cc42e76eSMario Limonciello struct amdgpu_firmware_info *info;
1442cc42e76eSMario Limonciello char ucode_prefix[30];
1443cc42e76eSMario Limonciello char fw_name[40];
144497998b89SJack Xiao bool need_retry = false;
1445cc42e76eSMario Limonciello int r;
1446cc42e76eSMario Limonciello
144797998b89SJack Xiao amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
144897998b89SJack Xiao sizeof(ucode_prefix));
144997998b89SJack Xiao if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
145097998b89SJack Xiao snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
145197998b89SJack Xiao ucode_prefix,
145297998b89SJack Xiao pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
145397998b89SJack Xiao need_retry = true;
145497998b89SJack Xiao } else {
1455cc42e76eSMario Limonciello snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1456cc42e76eSMario Limonciello ucode_prefix,
1457cc42e76eSMario Limonciello pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
145897998b89SJack Xiao }
145997998b89SJack Xiao
146011e0b006SMario Limonciello r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
146197998b89SJack Xiao if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
146297998b89SJack Xiao snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
146397998b89SJack Xiao ucode_prefix);
146497998b89SJack Xiao DRM_INFO("try to fall back to %s\n", fw_name);
146597998b89SJack Xiao r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
146697998b89SJack Xiao fw_name);
146797998b89SJack Xiao }
146897998b89SJack Xiao
1469cc42e76eSMario Limonciello if (r)
1470cc42e76eSMario Limonciello goto out;
1471cc42e76eSMario Limonciello
1472cc42e76eSMario Limonciello mes_hdr = (const struct mes_firmware_header_v1_0 *)
1473cc42e76eSMario Limonciello adev->mes.fw[pipe]->data;
1474cc42e76eSMario Limonciello adev->mes.uc_start_addr[pipe] =
1475cc42e76eSMario Limonciello le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
1476cc42e76eSMario Limonciello ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
1477cc42e76eSMario Limonciello adev->mes.data_start_addr[pipe] =
1478cc42e76eSMario Limonciello le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
1479cc42e76eSMario Limonciello ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
1480cc42e76eSMario Limonciello
1481cc42e76eSMario Limonciello if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1482cc42e76eSMario Limonciello int ucode, ucode_data;
1483cc42e76eSMario Limonciello
1484cc42e76eSMario Limonciello if (pipe == AMDGPU_MES_SCHED_PIPE) {
1485cc42e76eSMario Limonciello ucode = AMDGPU_UCODE_ID_CP_MES;
1486cc42e76eSMario Limonciello ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
1487cc42e76eSMario Limonciello } else {
1488cc42e76eSMario Limonciello ucode = AMDGPU_UCODE_ID_CP_MES1;
1489cc42e76eSMario Limonciello ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
1490cc42e76eSMario Limonciello }
1491cc42e76eSMario Limonciello
1492cc42e76eSMario Limonciello info = &adev->firmware.ucode[ucode];
1493cc42e76eSMario Limonciello info->ucode_id = ucode;
1494cc42e76eSMario Limonciello info->fw = adev->mes.fw[pipe];
1495cc42e76eSMario Limonciello adev->firmware.fw_size +=
1496cc42e76eSMario Limonciello ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
1497cc42e76eSMario Limonciello PAGE_SIZE);
1498cc42e76eSMario Limonciello
1499cc42e76eSMario Limonciello info = &adev->firmware.ucode[ucode_data];
1500cc42e76eSMario Limonciello info->ucode_id = ucode_data;
1501cc42e76eSMario Limonciello info->fw = adev->mes.fw[pipe];
1502cc42e76eSMario Limonciello adev->firmware.fw_size +=
1503cc42e76eSMario Limonciello ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
1504cc42e76eSMario Limonciello PAGE_SIZE);
1505cc42e76eSMario Limonciello }
1506cc42e76eSMario Limonciello
1507cc42e76eSMario Limonciello return 0;
1508cc42e76eSMario Limonciello out:
150911e0b006SMario Limonciello amdgpu_ucode_release(&adev->mes.fw[pipe]);
1510cc42e76eSMario Limonciello return r;
1511cc42e76eSMario Limonciello }
1512