132de57e9SJack Xiao /*
232de57e9SJack Xiao  * Copyright 2019 Advanced Micro Devices, Inc.
332de57e9SJack Xiao  *
432de57e9SJack Xiao  * Permission is hereby granted, free of charge, to any person obtaining a
532de57e9SJack Xiao  * copy of this software and associated documentation files (the "Software"),
632de57e9SJack Xiao  * to deal in the Software without restriction, including without limitation
732de57e9SJack Xiao  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
832de57e9SJack Xiao  * and/or sell copies of the Software, and to permit persons to whom the
932de57e9SJack Xiao  * Software is furnished to do so, subject to the following conditions:
1032de57e9SJack Xiao  *
1132de57e9SJack Xiao  * The above copyright notice and this permission notice shall be included in
1232de57e9SJack Xiao  * all copies or substantial portions of the Software.
1332de57e9SJack Xiao  *
1432de57e9SJack Xiao  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1532de57e9SJack Xiao  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1632de57e9SJack Xiao  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1732de57e9SJack Xiao  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1832de57e9SJack Xiao  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1932de57e9SJack Xiao  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2032de57e9SJack Xiao  * OTHER DEALINGS IN THE SOFTWARE.
2132de57e9SJack Xiao  *
2232de57e9SJack Xiao  */
2332de57e9SJack Xiao 
24cc42e76eSMario Limonciello #include <linux/firmware.h>
252acc73f8SChristian König #include <drm/drm_exec.h>
26cc42e76eSMario Limonciello 
2732de57e9SJack Xiao #include "amdgpu_mes.h"
2832de57e9SJack Xiao #include "amdgpu.h"
2932de57e9SJack Xiao #include "soc15_common.h"
3032de57e9SJack Xiao #include "amdgpu_mes_ctx.h"
3132de57e9SJack Xiao 
3232de57e9SJack Xiao #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
3332de57e9SJack Xiao #define AMDGPU_ONE_DOORBELL_SIZE 8
3432de57e9SJack Xiao 
amdgpu_mes_doorbell_process_slice(struct amdgpu_device * adev)35464913c0SMukul Joshi int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
3632de57e9SJack Xiao {
3732de57e9SJack Xiao 	return roundup(AMDGPU_ONE_DOORBELL_SIZE *
3832de57e9SJack Xiao 		       AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
3932de57e9SJack Xiao 		       PAGE_SIZE);
4032de57e9SJack Xiao }
4132de57e9SJack Xiao 
amdgpu_mes_kernel_doorbell_get(struct amdgpu_device * adev,struct amdgpu_mes_process * process,int ip_type,uint64_t * doorbell_index)42e3cbb1f4SShashank Sharma static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
4332de57e9SJack Xiao 					 struct amdgpu_mes_process *process,
4432de57e9SJack Xiao 					 int ip_type, uint64_t *doorbell_index)
4532de57e9SJack Xiao {
4632de57e9SJack Xiao 	unsigned int offset, found;
47e3cbb1f4SShashank Sharma 	struct amdgpu_mes *mes = &adev->mes;
4832de57e9SJack Xiao 
49e3cbb1f4SShashank Sharma 	if (ip_type == AMDGPU_RING_TYPE_SDMA)
5032de57e9SJack Xiao 		offset = adev->doorbell_index.sdma_engine[0];
51e3cbb1f4SShashank Sharma 	else
52e3cbb1f4SShashank Sharma 		offset = 0;
5332de57e9SJack Xiao 
54e3cbb1f4SShashank Sharma 	found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
55e3cbb1f4SShashank Sharma 	if (found >= mes->num_mes_dbs) {
5632de57e9SJack Xiao 		DRM_WARN("No doorbell available\n");
5732de57e9SJack Xiao 		return -ENOSPC;
5832de57e9SJack Xiao 	}
5932de57e9SJack Xiao 
60e3cbb1f4SShashank Sharma 	set_bit(found, mes->doorbell_bitmap);
6132de57e9SJack Xiao 
62e3cbb1f4SShashank Sharma 	/* Get the absolute doorbell index on BAR */
63e3cbb1f4SShashank Sharma 	*doorbell_index = mes->db_start_dw_offset + found * 2;
6432de57e9SJack Xiao 	return 0;
6532de57e9SJack Xiao }
6632de57e9SJack Xiao 
amdgpu_mes_kernel_doorbell_free(struct amdgpu_device * adev,struct amdgpu_mes_process * process,uint32_t doorbell_index)67e3cbb1f4SShashank Sharma static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
6832de57e9SJack Xiao 					   struct amdgpu_mes_process *process,
6932de57e9SJack Xiao 					   uint32_t doorbell_index)
7032de57e9SJack Xiao {
71e3cbb1f4SShashank Sharma 	unsigned int old, rel_index;
72e3cbb1f4SShashank Sharma 	struct amdgpu_mes *mes = &adev->mes;
7332de57e9SJack Xiao 
74e3cbb1f4SShashank Sharma 	/* Find the relative index of the doorbell in this object */
75e3cbb1f4SShashank Sharma 	rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
76e3cbb1f4SShashank Sharma 	old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
7732de57e9SJack Xiao 	WARN_ON(!old);
7832de57e9SJack Xiao }
7932de57e9SJack Xiao 
amdgpu_mes_doorbell_init(struct amdgpu_device * adev)8032de57e9SJack Xiao static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
8132de57e9SJack Xiao {
820fe69062SLe Ma 	int i;
83e3cbb1f4SShashank Sharma 	struct amdgpu_mes *mes = &adev->mes;
8432de57e9SJack Xiao 
85e3cbb1f4SShashank Sharma 	/* Bitmap for dynamic allocation of kernel doorbells */
86e3cbb1f4SShashank Sharma 	mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
87e3cbb1f4SShashank Sharma 	if (!mes->doorbell_bitmap) {
88e3cbb1f4SShashank Sharma 		DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
89e3cbb1f4SShashank Sharma 		return -ENOMEM;
90e3cbb1f4SShashank Sharma 	}
910fe69062SLe Ma 
92e3cbb1f4SShashank Sharma 	mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
93e3cbb1f4SShashank Sharma 	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
94e3cbb1f4SShashank Sharma 		adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
95e3cbb1f4SShashank Sharma 		set_bit(i, mes->doorbell_bitmap);
96e3cbb1f4SShashank Sharma 	}
9732de57e9SJack Xiao 
9832de57e9SJack Xiao 	return 0;
9932de57e9SJack Xiao }
100b04c1d64SJack Xiao 
amdgpu_mes_doorbell_free(struct amdgpu_device * adev)101e3cbb1f4SShashank Sharma static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
102e3cbb1f4SShashank Sharma {
103e3cbb1f4SShashank Sharma 	bitmap_free(adev->mes.doorbell_bitmap);
104e3cbb1f4SShashank Sharma }
105e3cbb1f4SShashank Sharma 
amdgpu_mes_init(struct amdgpu_device * adev)106b04c1d64SJack Xiao int amdgpu_mes_init(struct amdgpu_device *adev)
107b04c1d64SJack Xiao {
108b04c1d64SJack Xiao 	int i, r;
109b04c1d64SJack Xiao 
110b04c1d64SJack Xiao 	adev->mes.adev = adev;
111b04c1d64SJack Xiao 
112b04c1d64SJack Xiao 	idr_init(&adev->mes.pasid_idr);
113b04c1d64SJack Xiao 	idr_init(&adev->mes.gang_id_idr);
114b04c1d64SJack Xiao 	idr_init(&adev->mes.queue_id_idr);
115b04c1d64SJack Xiao 	ida_init(&adev->mes.doorbell_ida);
116b04c1d64SJack Xiao 	spin_lock_init(&adev->mes.queue_id_lock);
11735ba8850SJack Xiao 	spin_lock_init(&adev->mes.ring_lock);
11818ee4ce6SJack Xiao 	mutex_init(&adev->mes.mutex_hidden);
119b04c1d64SJack Xiao 
120b04c1d64SJack Xiao 	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
121b04c1d64SJack Xiao 	adev->mes.vmid_mask_mmhub = 0xffffff00;
122b04c1d64SJack Xiao 	adev->mes.vmid_mask_gfxhub = 0xffffff00;
123b04c1d64SJack Xiao 
124b04c1d64SJack Xiao 	for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
125b04c1d64SJack Xiao 		/* use only 1st MEC pipes */
126b04c1d64SJack Xiao 		if (i >= 4)
127b04c1d64SJack Xiao 			continue;
128b04c1d64SJack Xiao 		adev->mes.compute_hqd_mask[i] = 0xc;
129b04c1d64SJack Xiao 	}
130b04c1d64SJack Xiao 
131b04c1d64SJack Xiao 	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
132b04c1d64SJack Xiao 		adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
133b04c1d64SJack Xiao 
13418ee4ce6SJack Xiao 	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
13518ee4ce6SJack Xiao 		if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
136b04c1d64SJack Xiao 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
1370af4ed0cSYifan Zhang 		/* zero sdma_hqd_mask for non-existent engine */
1380af4ed0cSYifan Zhang 		else if (adev->sdma.num_instances == 1)
1390af4ed0cSYifan Zhang 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
14018ee4ce6SJack Xiao 		else
14118ee4ce6SJack Xiao 			adev->mes.sdma_hqd_mask[i] = 0xfc;
14218ee4ce6SJack Xiao 	}
143b04c1d64SJack Xiao 
144b04c1d64SJack Xiao 	r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
145b04c1d64SJack Xiao 	if (r) {
146b04c1d64SJack Xiao 		dev_err(adev->dev,
147b04c1d64SJack Xiao 			"(%d) ring trail_fence_offs wb alloc failed\n", r);
148b04c1d64SJack Xiao 		goto error_ids;
149b04c1d64SJack Xiao 	}
150b04c1d64SJack Xiao 	adev->mes.sch_ctx_gpu_addr =
151b04c1d64SJack Xiao 		adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
152b04c1d64SJack Xiao 	adev->mes.sch_ctx_ptr =
153b04c1d64SJack Xiao 		(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
154b04c1d64SJack Xiao 
1550bf478f0SJack Xiao 	r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
1560bf478f0SJack Xiao 	if (r) {
157adc0e6abSJack Xiao 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1580bf478f0SJack Xiao 		dev_err(adev->dev,
1590bf478f0SJack Xiao 			"(%d) query_status_fence_offs wb alloc failed\n", r);
160adc0e6abSJack Xiao 		goto error_ids;
1610bf478f0SJack Xiao 	}
1620bf478f0SJack Xiao 	adev->mes.query_status_fence_gpu_addr =
1630bf478f0SJack Xiao 		adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
1640bf478f0SJack Xiao 	adev->mes.query_status_fence_ptr =
1650bf478f0SJack Xiao 		(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
1660bf478f0SJack Xiao 
167adc0e6abSJack Xiao 	r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
168adc0e6abSJack Xiao 	if (r) {
169adc0e6abSJack Xiao 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
170adc0e6abSJack Xiao 		amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
171adc0e6abSJack Xiao 		dev_err(adev->dev,
172adc0e6abSJack Xiao 			"(%d) read_val_offs alloc failed\n", r);
173adc0e6abSJack Xiao 		goto error_ids;
174adc0e6abSJack Xiao 	}
175adc0e6abSJack Xiao 	adev->mes.read_val_gpu_addr =
176adc0e6abSJack Xiao 		adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
177adc0e6abSJack Xiao 	adev->mes.read_val_ptr =
178adc0e6abSJack Xiao 		(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
179adc0e6abSJack Xiao 
180b04c1d64SJack Xiao 	r = amdgpu_mes_doorbell_init(adev);
181b04c1d64SJack Xiao 	if (r)
182b04c1d64SJack Xiao 		goto error;
183b04c1d64SJack Xiao 
184b04c1d64SJack Xiao 	return 0;
185b04c1d64SJack Xiao 
186b04c1d64SJack Xiao error:
187b04c1d64SJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
188adc0e6abSJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
189adc0e6abSJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
190b04c1d64SJack Xiao error_ids:
191b04c1d64SJack Xiao 	idr_destroy(&adev->mes.pasid_idr);
192b04c1d64SJack Xiao 	idr_destroy(&adev->mes.gang_id_idr);
193b04c1d64SJack Xiao 	idr_destroy(&adev->mes.queue_id_idr);
194b04c1d64SJack Xiao 	ida_destroy(&adev->mes.doorbell_ida);
19518ee4ce6SJack Xiao 	mutex_destroy(&adev->mes.mutex_hidden);
196b04c1d64SJack Xiao 	return r;
197b04c1d64SJack Xiao }
198b04c1d64SJack Xiao 
amdgpu_mes_fini(struct amdgpu_device * adev)199b04c1d64SJack Xiao void amdgpu_mes_fini(struct amdgpu_device *adev)
200b04c1d64SJack Xiao {
201b04c1d64SJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
202adc0e6abSJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
203adc0e6abSJack Xiao 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
204e3cbb1f4SShashank Sharma 	amdgpu_mes_doorbell_free(adev);
205b04c1d64SJack Xiao 
206b04c1d64SJack Xiao 	idr_destroy(&adev->mes.pasid_idr);
207b04c1d64SJack Xiao 	idr_destroy(&adev->mes.gang_id_idr);
208b04c1d64SJack Xiao 	idr_destroy(&adev->mes.queue_id_idr);
209b04c1d64SJack Xiao 	ida_destroy(&adev->mes.doorbell_ida);
21018ee4ce6SJack Xiao 	mutex_destroy(&adev->mes.mutex_hidden);
21118ee4ce6SJack Xiao }
21218ee4ce6SJack Xiao 
amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue * q)21318ee4ce6SJack Xiao static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
21418ee4ce6SJack Xiao {
21518ee4ce6SJack Xiao 	amdgpu_bo_free_kernel(&q->mqd_obj,
21618ee4ce6SJack Xiao 			      &q->mqd_gpu_addr,
21718ee4ce6SJack Xiao 			      &q->mqd_cpu_ptr);
218b04c1d64SJack Xiao }
21948dcd2b7SJack Xiao 
amdgpu_mes_create_process(struct amdgpu_device * adev,int pasid,struct amdgpu_vm * vm)22048dcd2b7SJack Xiao int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
22148dcd2b7SJack Xiao 			      struct amdgpu_vm *vm)
22248dcd2b7SJack Xiao {
22348dcd2b7SJack Xiao 	struct amdgpu_mes_process *process;
22448dcd2b7SJack Xiao 	int r;
22548dcd2b7SJack Xiao 
22648dcd2b7SJack Xiao 	/* allocate the mes process buffer */
22748dcd2b7SJack Xiao 	process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
22848dcd2b7SJack Xiao 	if (!process) {
22948dcd2b7SJack Xiao 		DRM_ERROR("no more memory to create mes process\n");
23048dcd2b7SJack Xiao 		return -ENOMEM;
23148dcd2b7SJack Xiao 	}
23248dcd2b7SJack Xiao 
23348dcd2b7SJack Xiao 	/* allocate the process context bo and map it */
23448dcd2b7SJack Xiao 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
23548dcd2b7SJack Xiao 				    AMDGPU_GEM_DOMAIN_GTT,
23648dcd2b7SJack Xiao 				    &process->proc_ctx_bo,
23748dcd2b7SJack Xiao 				    &process->proc_ctx_gpu_addr,
23848dcd2b7SJack Xiao 				    &process->proc_ctx_cpu_ptr);
23948dcd2b7SJack Xiao 	if (r) {
24048dcd2b7SJack Xiao 		DRM_ERROR("failed to allocate process context bo\n");
24118ee4ce6SJack Xiao 		goto clean_up_memory;
24248dcd2b7SJack Xiao 	}
24348dcd2b7SJack Xiao 	memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
24448dcd2b7SJack Xiao 
24518ee4ce6SJack Xiao 	/*
24618ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
24718ee4ce6SJack Xiao 	 * lock dependencies.
24818ee4ce6SJack Xiao 	 */
24918ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
25018ee4ce6SJack Xiao 
25118ee4ce6SJack Xiao 	/* add the mes process to idr list */
25218ee4ce6SJack Xiao 	r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
25318ee4ce6SJack Xiao 		      GFP_KERNEL);
25418ee4ce6SJack Xiao 	if (r < 0) {
25518ee4ce6SJack Xiao 		DRM_ERROR("failed to lock pasid=%d\n", pasid);
25618ee4ce6SJack Xiao 		goto clean_up_ctx;
25718ee4ce6SJack Xiao 	}
25818ee4ce6SJack Xiao 
25948dcd2b7SJack Xiao 	INIT_LIST_HEAD(&process->gang_list);
26048dcd2b7SJack Xiao 	process->vm = vm;
26148dcd2b7SJack Xiao 	process->pasid = pasid;
26248dcd2b7SJack Xiao 	process->process_quantum = adev->mes.default_process_quantum;
26348dcd2b7SJack Xiao 	process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
26448dcd2b7SJack Xiao 
26518ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
26648dcd2b7SJack Xiao 	return 0;
26748dcd2b7SJack Xiao 
26848dcd2b7SJack Xiao clean_up_ctx:
269664c3b03SShashank Sharma 	amdgpu_mes_unlock(&adev->mes);
27048dcd2b7SJack Xiao 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
27148dcd2b7SJack Xiao 			      &process->proc_ctx_gpu_addr,
27248dcd2b7SJack Xiao 			      &process->proc_ctx_cpu_ptr);
27348dcd2b7SJack Xiao clean_up_memory:
27448dcd2b7SJack Xiao 	kfree(process);
27548dcd2b7SJack Xiao 	return r;
27648dcd2b7SJack Xiao }
277063a38d6SJack Xiao 
amdgpu_mes_destroy_process(struct amdgpu_device * adev,int pasid)278063a38d6SJack Xiao void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
279063a38d6SJack Xiao {
280063a38d6SJack Xiao 	struct amdgpu_mes_process *process;
281063a38d6SJack Xiao 	struct amdgpu_mes_gang *gang, *tmp1;
282063a38d6SJack Xiao 	struct amdgpu_mes_queue *queue, *tmp2;
283063a38d6SJack Xiao 	struct mes_remove_queue_input queue_input;
284063a38d6SJack Xiao 	unsigned long flags;
285063a38d6SJack Xiao 	int r;
286063a38d6SJack Xiao 
28718ee4ce6SJack Xiao 	/*
28818ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
28918ee4ce6SJack Xiao 	 * lock dependencies.
29018ee4ce6SJack Xiao 	 */
29118ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
292063a38d6SJack Xiao 
293063a38d6SJack Xiao 	process = idr_find(&adev->mes.pasid_idr, pasid);
294063a38d6SJack Xiao 	if (!process) {
295063a38d6SJack Xiao 		DRM_WARN("pasid %d doesn't exist\n", pasid);
29618ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
297063a38d6SJack Xiao 		return;
298063a38d6SJack Xiao 	}
299063a38d6SJack Xiao 
30018ee4ce6SJack Xiao 	/* Remove all queues from hardware */
301063a38d6SJack Xiao 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
302063a38d6SJack Xiao 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
303063a38d6SJack Xiao 			spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
304063a38d6SJack Xiao 			idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
305063a38d6SJack Xiao 			spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
306063a38d6SJack Xiao 
307063a38d6SJack Xiao 			queue_input.doorbell_offset = queue->doorbell_off;
308063a38d6SJack Xiao 			queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
309063a38d6SJack Xiao 
310063a38d6SJack Xiao 			r = adev->mes.funcs->remove_hw_queue(&adev->mes,
311063a38d6SJack Xiao 							     &queue_input);
312063a38d6SJack Xiao 			if (r)
313063a38d6SJack Xiao 				DRM_WARN("failed to remove hardware queue\n");
314063a38d6SJack Xiao 		}
315063a38d6SJack Xiao 
316063a38d6SJack Xiao 		idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
31718ee4ce6SJack Xiao 	}
31818ee4ce6SJack Xiao 
31918ee4ce6SJack Xiao 	idr_remove(&adev->mes.pasid_idr, pasid);
32018ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
32118ee4ce6SJack Xiao 
32218ee4ce6SJack Xiao 	/* free all memory allocated by the process */
32318ee4ce6SJack Xiao 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
32418ee4ce6SJack Xiao 		/* free all queues in the gang */
32518ee4ce6SJack Xiao 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
32618ee4ce6SJack Xiao 			amdgpu_mes_queue_free_mqd(queue);
32718ee4ce6SJack Xiao 			list_del(&queue->list);
32818ee4ce6SJack Xiao 			kfree(queue);
32918ee4ce6SJack Xiao 		}
330063a38d6SJack Xiao 		amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
331063a38d6SJack Xiao 				      &gang->gang_ctx_gpu_addr,
332063a38d6SJack Xiao 				      &gang->gang_ctx_cpu_ptr);
333063a38d6SJack Xiao 		list_del(&gang->list);
334063a38d6SJack Xiao 		kfree(gang);
33518ee4ce6SJack Xiao 
336063a38d6SJack Xiao 	}
337063a38d6SJack Xiao 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
338063a38d6SJack Xiao 			      &process->proc_ctx_gpu_addr,
339063a38d6SJack Xiao 			      &process->proc_ctx_cpu_ptr);
340063a38d6SJack Xiao 	kfree(process);
341063a38d6SJack Xiao }
3425d0f619fSJack Xiao 
amdgpu_mes_add_gang(struct amdgpu_device * adev,int pasid,struct amdgpu_mes_gang_properties * gprops,int * gang_id)3435d0f619fSJack Xiao int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
3445d0f619fSJack Xiao 			struct amdgpu_mes_gang_properties *gprops,
3455d0f619fSJack Xiao 			int *gang_id)
3465d0f619fSJack Xiao {
3475d0f619fSJack Xiao 	struct amdgpu_mes_process *process;
3485d0f619fSJack Xiao 	struct amdgpu_mes_gang *gang;
3495d0f619fSJack Xiao 	int r;
3505d0f619fSJack Xiao 
3515d0f619fSJack Xiao 	/* allocate the mes gang buffer */
3525d0f619fSJack Xiao 	gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
3535d0f619fSJack Xiao 	if (!gang) {
3545d0f619fSJack Xiao 		return -ENOMEM;
3555d0f619fSJack Xiao 	}
3565d0f619fSJack Xiao 
3575d0f619fSJack Xiao 	/* allocate the gang context bo and map it to cpu space */
3585d0f619fSJack Xiao 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
3595d0f619fSJack Xiao 				    AMDGPU_GEM_DOMAIN_GTT,
3605d0f619fSJack Xiao 				    &gang->gang_ctx_bo,
3615d0f619fSJack Xiao 				    &gang->gang_ctx_gpu_addr,
3625d0f619fSJack Xiao 				    &gang->gang_ctx_cpu_ptr);
3635d0f619fSJack Xiao 	if (r) {
3645d0f619fSJack Xiao 		DRM_ERROR("failed to allocate process context bo\n");
36518ee4ce6SJack Xiao 		goto clean_up_mem;
3665d0f619fSJack Xiao 	}
3675d0f619fSJack Xiao 	memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
3685d0f619fSJack Xiao 
36918ee4ce6SJack Xiao 	/*
37018ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
37118ee4ce6SJack Xiao 	 * lock dependencies.
37218ee4ce6SJack Xiao 	 */
37318ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
37418ee4ce6SJack Xiao 
37518ee4ce6SJack Xiao 	process = idr_find(&adev->mes.pasid_idr, pasid);
37618ee4ce6SJack Xiao 	if (!process) {
37718ee4ce6SJack Xiao 		DRM_ERROR("pasid %d doesn't exist\n", pasid);
37818ee4ce6SJack Xiao 		r = -EINVAL;
37918ee4ce6SJack Xiao 		goto clean_up_ctx;
38018ee4ce6SJack Xiao 	}
38118ee4ce6SJack Xiao 
38218ee4ce6SJack Xiao 	/* add the mes gang to idr list */
38318ee4ce6SJack Xiao 	r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
38418ee4ce6SJack Xiao 		      GFP_KERNEL);
38518ee4ce6SJack Xiao 	if (r < 0) {
38618ee4ce6SJack Xiao 		DRM_ERROR("failed to allocate idr for gang\n");
38718ee4ce6SJack Xiao 		goto clean_up_ctx;
38818ee4ce6SJack Xiao 	}
38918ee4ce6SJack Xiao 
39018ee4ce6SJack Xiao 	gang->gang_id = r;
39118ee4ce6SJack Xiao 	*gang_id = r;
39218ee4ce6SJack Xiao 
3935d0f619fSJack Xiao 	INIT_LIST_HEAD(&gang->queue_list);
3945d0f619fSJack Xiao 	gang->process = process;
3955d0f619fSJack Xiao 	gang->priority = gprops->priority;
3965d0f619fSJack Xiao 	gang->gang_quantum = gprops->gang_quantum ?
3975d0f619fSJack Xiao 		gprops->gang_quantum : adev->mes.default_gang_quantum;
3985d0f619fSJack Xiao 	gang->global_priority_level = gprops->global_priority_level;
3995d0f619fSJack Xiao 	gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
4005d0f619fSJack Xiao 	list_add_tail(&gang->list, &process->gang_list);
4015d0f619fSJack Xiao 
40218ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
4035d0f619fSJack Xiao 	return 0;
4045d0f619fSJack Xiao 
40518ee4ce6SJack Xiao clean_up_ctx:
40618ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
40718ee4ce6SJack Xiao 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
40818ee4ce6SJack Xiao 			      &gang->gang_ctx_gpu_addr,
40918ee4ce6SJack Xiao 			      &gang->gang_ctx_cpu_ptr);
41018ee4ce6SJack Xiao clean_up_mem:
4115d0f619fSJack Xiao 	kfree(gang);
4125d0f619fSJack Xiao 	return r;
4135d0f619fSJack Xiao }
414b0306e58SJack Xiao 
amdgpu_mes_remove_gang(struct amdgpu_device * adev,int gang_id)415b0306e58SJack Xiao int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
416b0306e58SJack Xiao {
417b0306e58SJack Xiao 	struct amdgpu_mes_gang *gang;
418b0306e58SJack Xiao 
41918ee4ce6SJack Xiao 	/*
42018ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
42118ee4ce6SJack Xiao 	 * lock dependencies.
42218ee4ce6SJack Xiao 	 */
42318ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
424b0306e58SJack Xiao 
425b0306e58SJack Xiao 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
426b0306e58SJack Xiao 	if (!gang) {
427b0306e58SJack Xiao 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
42818ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
429b0306e58SJack Xiao 		return -EINVAL;
430b0306e58SJack Xiao 	}
431b0306e58SJack Xiao 
432b0306e58SJack Xiao 	if (!list_empty(&gang->queue_list)) {
433b0306e58SJack Xiao 		DRM_ERROR("queue list is not empty\n");
43418ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
435b0306e58SJack Xiao 		return -EBUSY;
436b0306e58SJack Xiao 	}
437b0306e58SJack Xiao 
438b0306e58SJack Xiao 	idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
43918ee4ce6SJack Xiao 	list_del(&gang->list);
44018ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
44118ee4ce6SJack Xiao 
442b0306e58SJack Xiao 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
443b0306e58SJack Xiao 			      &gang->gang_ctx_gpu_addr,
444b0306e58SJack Xiao 			      &gang->gang_ctx_cpu_ptr);
44518ee4ce6SJack Xiao 
446b0306e58SJack Xiao 	kfree(gang);
447b0306e58SJack Xiao 
448b0306e58SJack Xiao 	return 0;
449b0306e58SJack Xiao }
450c8bb1057SJack Xiao 
amdgpu_mes_suspend(struct amdgpu_device * adev)451c8bb1057SJack Xiao int amdgpu_mes_suspend(struct amdgpu_device *adev)
452c8bb1057SJack Xiao {
453c8bb1057SJack Xiao 	struct idr *idp;
454c8bb1057SJack Xiao 	struct amdgpu_mes_process *process;
455c8bb1057SJack Xiao 	struct amdgpu_mes_gang *gang;
456c8bb1057SJack Xiao 	struct mes_suspend_gang_input input;
457c8bb1057SJack Xiao 	int r, pasid;
458c8bb1057SJack Xiao 
45918ee4ce6SJack Xiao 	/*
46018ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
46118ee4ce6SJack Xiao 	 * lock dependencies.
46218ee4ce6SJack Xiao 	 */
46318ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
464c8bb1057SJack Xiao 
465c8bb1057SJack Xiao 	idp = &adev->mes.pasid_idr;
466c8bb1057SJack Xiao 
467c8bb1057SJack Xiao 	idr_for_each_entry(idp, process, pasid) {
468c8bb1057SJack Xiao 		list_for_each_entry(gang, &process->gang_list, list) {
469c8bb1057SJack Xiao 			r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
470c8bb1057SJack Xiao 			if (r)
471c8bb1057SJack Xiao 				DRM_ERROR("failed to suspend pasid %d gangid %d",
472c8bb1057SJack Xiao 					 pasid, gang->gang_id);
473c8bb1057SJack Xiao 		}
474c8bb1057SJack Xiao 	}
475c8bb1057SJack Xiao 
47618ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
477c8bb1057SJack Xiao 	return 0;
478c8bb1057SJack Xiao }
479ea756bd5SJack Xiao 
amdgpu_mes_resume(struct amdgpu_device * adev)480ea756bd5SJack Xiao int amdgpu_mes_resume(struct amdgpu_device *adev)
481ea756bd5SJack Xiao {
482ea756bd5SJack Xiao 	struct idr *idp;
483ea756bd5SJack Xiao 	struct amdgpu_mes_process *process;
484ea756bd5SJack Xiao 	struct amdgpu_mes_gang *gang;
485ea756bd5SJack Xiao 	struct mes_resume_gang_input input;
486ea756bd5SJack Xiao 	int r, pasid;
487ea756bd5SJack Xiao 
48818ee4ce6SJack Xiao 	/*
48918ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
49018ee4ce6SJack Xiao 	 * lock dependencies.
49118ee4ce6SJack Xiao 	 */
49218ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
493ea756bd5SJack Xiao 
494ea756bd5SJack Xiao 	idp = &adev->mes.pasid_idr;
495ea756bd5SJack Xiao 
496ea756bd5SJack Xiao 	idr_for_each_entry(idp, process, pasid) {
497ea756bd5SJack Xiao 		list_for_each_entry(gang, &process->gang_list, list) {
498ea756bd5SJack Xiao 			r = adev->mes.funcs->resume_gang(&adev->mes, &input);
499ea756bd5SJack Xiao 			if (r)
500ea756bd5SJack Xiao 				DRM_ERROR("failed to resume pasid %d gangid %d",
501ea756bd5SJack Xiao 					 pasid, gang->gang_id);
502ea756bd5SJack Xiao 		}
503ea756bd5SJack Xiao 	}
504ea756bd5SJack Xiao 
50518ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
506ea756bd5SJack Xiao 	return 0;
507ea756bd5SJack Xiao }
5085fa963d0SJack Xiao 
amdgpu_mes_queue_alloc_mqd(struct amdgpu_device * adev,struct amdgpu_mes_queue * q,struct amdgpu_mes_queue_properties * p)50918ee4ce6SJack Xiao static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
5105fa963d0SJack Xiao 				     struct amdgpu_mes_queue *q,
5115fa963d0SJack Xiao 				     struct amdgpu_mes_queue_properties *p)
5125fa963d0SJack Xiao {
5135fa963d0SJack Xiao 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
5145fa963d0SJack Xiao 	u32 mqd_size = mqd_mgr->mqd_size;
5155fa963d0SJack Xiao 	int r;
5165fa963d0SJack Xiao 
5175fa963d0SJack Xiao 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
5185fa963d0SJack Xiao 				    AMDGPU_GEM_DOMAIN_GTT,
5195fa963d0SJack Xiao 				    &q->mqd_obj,
5205fa963d0SJack Xiao 				    &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
5215fa963d0SJack Xiao 	if (r) {
5225fa963d0SJack Xiao 		dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
5235fa963d0SJack Xiao 		return r;
5245fa963d0SJack Xiao 	}
5255fa963d0SJack Xiao 	memset(q->mqd_cpu_ptr, 0, mqd_size);
5265fa963d0SJack Xiao 
52718ee4ce6SJack Xiao 	r = amdgpu_bo_reserve(q->mqd_obj, false);
52818ee4ce6SJack Xiao 	if (unlikely(r != 0))
52918ee4ce6SJack Xiao 		goto clean_up;
53018ee4ce6SJack Xiao 
53118ee4ce6SJack Xiao 	return 0;
53218ee4ce6SJack Xiao 
53318ee4ce6SJack Xiao clean_up:
53418ee4ce6SJack Xiao 	amdgpu_bo_free_kernel(&q->mqd_obj,
53518ee4ce6SJack Xiao 			      &q->mqd_gpu_addr,
53618ee4ce6SJack Xiao 			      &q->mqd_cpu_ptr);
53718ee4ce6SJack Xiao 	return r;
53818ee4ce6SJack Xiao }
53918ee4ce6SJack Xiao 
amdgpu_mes_queue_init_mqd(struct amdgpu_device * adev,struct amdgpu_mes_queue * q,struct amdgpu_mes_queue_properties * p)54018ee4ce6SJack Xiao static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
54118ee4ce6SJack Xiao 				     struct amdgpu_mes_queue *q,
54218ee4ce6SJack Xiao 				     struct amdgpu_mes_queue_properties *p)
54318ee4ce6SJack Xiao {
54418ee4ce6SJack Xiao 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
54518ee4ce6SJack Xiao 	struct amdgpu_mqd_prop mqd_prop = {0};
54618ee4ce6SJack Xiao 
5475fa963d0SJack Xiao 	mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
5485fa963d0SJack Xiao 	mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
5495fa963d0SJack Xiao 	mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
5505fa963d0SJack Xiao 	mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
5515fa963d0SJack Xiao 	mqd_prop.queue_size = p->queue_size;
5525fa963d0SJack Xiao 	mqd_prop.use_doorbell = true;
5535fa963d0SJack Xiao 	mqd_prop.doorbell_index = p->doorbell_off;
5545fa963d0SJack Xiao 	mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
5555fa963d0SJack Xiao 	mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
5565fa963d0SJack Xiao 	mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
5575fa963d0SJack Xiao 	mqd_prop.hqd_active = false;
5585fa963d0SJack Xiao 
559553d2683STim Huang 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
560553d2683STim Huang 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
561553d2683STim Huang 		mutex_lock(&adev->srbm_mutex);
562553d2683STim Huang 		amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
563553d2683STim Huang 	}
564553d2683STim Huang 
5655fa963d0SJack Xiao 	mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
5665fa963d0SJack Xiao 
567553d2683STim Huang 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
568553d2683STim Huang 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
569553d2683STim Huang 		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
570553d2683STim Huang 		mutex_unlock(&adev->srbm_mutex);
571553d2683STim Huang 	}
572553d2683STim Huang 
5735fa963d0SJack Xiao 	amdgpu_bo_unreserve(q->mqd_obj);
5745fa963d0SJack Xiao }
575be5609deSJack Xiao 
amdgpu_mes_add_hw_queue(struct amdgpu_device * adev,int gang_id,struct amdgpu_mes_queue_properties * qprops,int * queue_id)576be5609deSJack Xiao int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
577be5609deSJack Xiao 			    struct amdgpu_mes_queue_properties *qprops,
578be5609deSJack Xiao 			    int *queue_id)
579be5609deSJack Xiao {
580be5609deSJack Xiao 	struct amdgpu_mes_queue *queue;
581be5609deSJack Xiao 	struct amdgpu_mes_gang *gang;
582be5609deSJack Xiao 	struct mes_add_queue_input queue_input;
583be5609deSJack Xiao 	unsigned long flags;
584be5609deSJack Xiao 	int r;
585be5609deSJack Xiao 
5867a1c5c67SJonathan Kim 	memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
5877a1c5c67SJonathan Kim 
58818ee4ce6SJack Xiao 	/* allocate the mes queue buffer */
58918ee4ce6SJack Xiao 	queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
59018ee4ce6SJack Xiao 	if (!queue) {
59118ee4ce6SJack Xiao 		DRM_ERROR("Failed to allocate memory for queue\n");
59218ee4ce6SJack Xiao 		return -ENOMEM;
59318ee4ce6SJack Xiao 	}
59418ee4ce6SJack Xiao 
59518ee4ce6SJack Xiao 	/* Allocate the queue mqd */
59618ee4ce6SJack Xiao 	r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
59718ee4ce6SJack Xiao 	if (r)
59818ee4ce6SJack Xiao 		goto clean_up_memory;
59918ee4ce6SJack Xiao 
60018ee4ce6SJack Xiao 	/*
60118ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
60218ee4ce6SJack Xiao 	 * lock dependencies.
60318ee4ce6SJack Xiao 	 */
60418ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
605be5609deSJack Xiao 
606be5609deSJack Xiao 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
607be5609deSJack Xiao 	if (!gang) {
608be5609deSJack Xiao 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
60918ee4ce6SJack Xiao 		r = -EINVAL;
61018ee4ce6SJack Xiao 		goto clean_up_mqd;
611be5609deSJack Xiao 	}
612be5609deSJack Xiao 
613be5609deSJack Xiao 	/* add the mes gang to idr list */
614be5609deSJack Xiao 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
615be5609deSJack Xiao 	r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
616be5609deSJack Xiao 		      GFP_ATOMIC);
617be5609deSJack Xiao 	if (r < 0) {
618be5609deSJack Xiao 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
61918ee4ce6SJack Xiao 		goto clean_up_mqd;
620be5609deSJack Xiao 	}
621be5609deSJack Xiao 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
622be5609deSJack Xiao 	*queue_id = queue->queue_id = r;
623be5609deSJack Xiao 
624be5609deSJack Xiao 	/* allocate a doorbell index for the queue */
625e3cbb1f4SShashank Sharma 	r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
626be5609deSJack Xiao 					  qprops->queue_type,
627be5609deSJack Xiao 					  &qprops->doorbell_off);
628be5609deSJack Xiao 	if (r)
629be5609deSJack Xiao 		goto clean_up_queue_id;
630be5609deSJack Xiao 
631be5609deSJack Xiao 	/* initialize the queue mqd */
63218ee4ce6SJack Xiao 	amdgpu_mes_queue_init_mqd(adev, queue, qprops);
633be5609deSJack Xiao 
634be5609deSJack Xiao 	/* add hw queue to mes */
635be5609deSJack Xiao 	queue_input.process_id = gang->process->pasid;
63618ee4ce6SJack Xiao 
63718ee4ce6SJack Xiao 	queue_input.page_table_base_addr =
63818ee4ce6SJack Xiao 		adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
63918ee4ce6SJack Xiao 		adev->gmc.vram_start;
64018ee4ce6SJack Xiao 
641be5609deSJack Xiao 	queue_input.process_va_start = 0;
642be5609deSJack Xiao 	queue_input.process_va_end =
643be5609deSJack Xiao 		(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
644be5609deSJack Xiao 	queue_input.process_quantum = gang->process->process_quantum;
645be5609deSJack Xiao 	queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
646be5609deSJack Xiao 	queue_input.gang_quantum = gang->gang_quantum;
647be5609deSJack Xiao 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
648be5609deSJack Xiao 	queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
649be5609deSJack Xiao 	queue_input.gang_global_priority_level = gang->global_priority_level;
650be5609deSJack Xiao 	queue_input.doorbell_offset = qprops->doorbell_off;
651be5609deSJack Xiao 	queue_input.mqd_addr = queue->mqd_gpu_addr;
652be5609deSJack Xiao 	queue_input.wptr_addr = qprops->wptr_gpu_addr;
653fe4e9ff9SJack Xiao 	queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
654be5609deSJack Xiao 	queue_input.queue_type = qprops->queue_type;
655be5609deSJack Xiao 	queue_input.paging = qprops->paging;
656a9579956SGraham Sider 	queue_input.is_kfd_process = 0;
657be5609deSJack Xiao 
658be5609deSJack Xiao 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
659be5609deSJack Xiao 	if (r) {
660be5609deSJack Xiao 		DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
661be5609deSJack Xiao 			  qprops->doorbell_off);
66218ee4ce6SJack Xiao 		goto clean_up_doorbell;
663be5609deSJack Xiao 	}
664be5609deSJack Xiao 
665be5609deSJack Xiao 	DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
666be5609deSJack Xiao 		  "queue type=%d, doorbell=0x%llx\n",
667be5609deSJack Xiao 		  gang->process->pasid, gang_id, qprops->queue_type,
668be5609deSJack Xiao 		  qprops->doorbell_off);
669be5609deSJack Xiao 
670be5609deSJack Xiao 	queue->ring = qprops->ring;
671be5609deSJack Xiao 	queue->doorbell_off = qprops->doorbell_off;
672be5609deSJack Xiao 	queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
673be5609deSJack Xiao 	queue->queue_type = qprops->queue_type;
674be5609deSJack Xiao 	queue->paging = qprops->paging;
675be5609deSJack Xiao 	queue->gang = gang;
6762d7a1f71SLe Ma 	queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
677be5609deSJack Xiao 	list_add_tail(&queue->list, &gang->queue_list);
678be5609deSJack Xiao 
67918ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
680be5609deSJack Xiao 	return 0;
681be5609deSJack Xiao 
682be5609deSJack Xiao clean_up_doorbell:
683e3cbb1f4SShashank Sharma 	amdgpu_mes_kernel_doorbell_free(adev, gang->process,
684be5609deSJack Xiao 				       qprops->doorbell_off);
685be5609deSJack Xiao clean_up_queue_id:
686be5609deSJack Xiao 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
687be5609deSJack Xiao 	idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
688be5609deSJack Xiao 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
68918ee4ce6SJack Xiao clean_up_mqd:
69018ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
69118ee4ce6SJack Xiao 	amdgpu_mes_queue_free_mqd(queue);
692be5609deSJack Xiao clean_up_memory:
693be5609deSJack Xiao 	kfree(queue);
694be5609deSJack Xiao 	return r;
695be5609deSJack Xiao }
696bcc4e1e1SJack Xiao 
amdgpu_mes_remove_hw_queue(struct amdgpu_device * adev,int queue_id)697bcc4e1e1SJack Xiao int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
698bcc4e1e1SJack Xiao {
699bcc4e1e1SJack Xiao 	unsigned long flags;
700bcc4e1e1SJack Xiao 	struct amdgpu_mes_queue *queue;
701bcc4e1e1SJack Xiao 	struct amdgpu_mes_gang *gang;
702bcc4e1e1SJack Xiao 	struct mes_remove_queue_input queue_input;
703bcc4e1e1SJack Xiao 	int r;
704bcc4e1e1SJack Xiao 
70518ee4ce6SJack Xiao 	/*
70618ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
70718ee4ce6SJack Xiao 	 * lock dependencies.
70818ee4ce6SJack Xiao 	 */
70918ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
710bcc4e1e1SJack Xiao 
711bcc4e1e1SJack Xiao 	/* remove the mes gang from idr list */
712bcc4e1e1SJack Xiao 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
713bcc4e1e1SJack Xiao 
714bcc4e1e1SJack Xiao 	queue = idr_find(&adev->mes.queue_id_idr, queue_id);
715bcc4e1e1SJack Xiao 	if (!queue) {
716bcc4e1e1SJack Xiao 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
71718ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
718bcc4e1e1SJack Xiao 		DRM_ERROR("queue id %d doesn't exist\n", queue_id);
719bcc4e1e1SJack Xiao 		return -EINVAL;
720bcc4e1e1SJack Xiao 	}
721bcc4e1e1SJack Xiao 
722bcc4e1e1SJack Xiao 	idr_remove(&adev->mes.queue_id_idr, queue_id);
723bcc4e1e1SJack Xiao 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
724bcc4e1e1SJack Xiao 
725bcc4e1e1SJack Xiao 	DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
726bcc4e1e1SJack Xiao 		  queue->doorbell_off);
727bcc4e1e1SJack Xiao 
728bcc4e1e1SJack Xiao 	gang = queue->gang;
729bcc4e1e1SJack Xiao 	queue_input.doorbell_offset = queue->doorbell_off;
730bcc4e1e1SJack Xiao 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
731bcc4e1e1SJack Xiao 
732bcc4e1e1SJack Xiao 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
733bcc4e1e1SJack Xiao 	if (r)
734bcc4e1e1SJack Xiao 		DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
735bcc4e1e1SJack Xiao 			  queue_id);
736bcc4e1e1SJack Xiao 
737bcc4e1e1SJack Xiao 	list_del(&queue->list);
738e3cbb1f4SShashank Sharma 	amdgpu_mes_kernel_doorbell_free(adev, gang->process,
739bcc4e1e1SJack Xiao 				       queue->doorbell_off);
74018ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
74118ee4ce6SJack Xiao 
74218ee4ce6SJack Xiao 	amdgpu_mes_queue_free_mqd(queue);
743bcc4e1e1SJack Xiao 	kfree(queue);
744bcc4e1e1SJack Xiao 	return 0;
745bcc4e1e1SJack Xiao }
7461a27aacbSJack Xiao 
amdgpu_mes_unmap_legacy_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)74718ee4ce6SJack Xiao int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
74818ee4ce6SJack Xiao 				  struct amdgpu_ring *ring,
74918ee4ce6SJack Xiao 				  enum amdgpu_unmap_queues_action action,
75018ee4ce6SJack Xiao 				  u64 gpu_addr, u64 seq)
75118ee4ce6SJack Xiao {
75218ee4ce6SJack Xiao 	struct mes_unmap_legacy_queue_input queue_input;
75318ee4ce6SJack Xiao 	int r;
75418ee4ce6SJack Xiao 
75518ee4ce6SJack Xiao 	queue_input.action = action;
75618ee4ce6SJack Xiao 	queue_input.queue_type = ring->funcs->type;
75718ee4ce6SJack Xiao 	queue_input.doorbell_offset = ring->doorbell_index;
75818ee4ce6SJack Xiao 	queue_input.pipe_id = ring->pipe;
75918ee4ce6SJack Xiao 	queue_input.queue_id = ring->queue;
76018ee4ce6SJack Xiao 	queue_input.trail_fence_addr = gpu_addr;
76118ee4ce6SJack Xiao 	queue_input.trail_fence_data = seq;
76218ee4ce6SJack Xiao 
76318ee4ce6SJack Xiao 	r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
76418ee4ce6SJack Xiao 	if (r)
76518ee4ce6SJack Xiao 		DRM_ERROR("failed to unmap legacy queue\n");
76618ee4ce6SJack Xiao 
76718ee4ce6SJack Xiao 	return r;
76818ee4ce6SJack Xiao }
76918ee4ce6SJack Xiao 
amdgpu_mes_rreg(struct amdgpu_device * adev,uint32_t reg)770adc0e6abSJack Xiao uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
771adc0e6abSJack Xiao {
772adc0e6abSJack Xiao 	struct mes_misc_op_input op_input;
773adc0e6abSJack Xiao 	int r, val = 0;
774adc0e6abSJack Xiao 
775adc0e6abSJack Xiao 	op_input.op = MES_MISC_OP_READ_REG;
776adc0e6abSJack Xiao 	op_input.read_reg.reg_offset = reg;
777adc0e6abSJack Xiao 	op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
778adc0e6abSJack Xiao 
779adc0e6abSJack Xiao 	if (!adev->mes.funcs->misc_op) {
780adc0e6abSJack Xiao 		DRM_ERROR("mes rreg is not supported!\n");
781adc0e6abSJack Xiao 		goto error;
782adc0e6abSJack Xiao 	}
783adc0e6abSJack Xiao 
784adc0e6abSJack Xiao 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
785adc0e6abSJack Xiao 	if (r)
786adc0e6abSJack Xiao 		DRM_ERROR("failed to read reg (0x%x)\n", reg);
787adc0e6abSJack Xiao 	else
788adc0e6abSJack Xiao 		val = *(adev->mes.read_val_ptr);
789adc0e6abSJack Xiao 
790adc0e6abSJack Xiao error:
791adc0e6abSJack Xiao 	return val;
792adc0e6abSJack Xiao }
793adc0e6abSJack Xiao 
amdgpu_mes_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t val)794adc0e6abSJack Xiao int amdgpu_mes_wreg(struct amdgpu_device *adev,
795adc0e6abSJack Xiao 		    uint32_t reg, uint32_t val)
796adc0e6abSJack Xiao {
797adc0e6abSJack Xiao 	struct mes_misc_op_input op_input;
798adc0e6abSJack Xiao 	int r;
799adc0e6abSJack Xiao 
800adc0e6abSJack Xiao 	op_input.op = MES_MISC_OP_WRITE_REG;
801adc0e6abSJack Xiao 	op_input.write_reg.reg_offset = reg;
802adc0e6abSJack Xiao 	op_input.write_reg.reg_value = val;
803adc0e6abSJack Xiao 
804adc0e6abSJack Xiao 	if (!adev->mes.funcs->misc_op) {
805adc0e6abSJack Xiao 		DRM_ERROR("mes wreg is not supported!\n");
806adc0e6abSJack Xiao 		r = -EINVAL;
807adc0e6abSJack Xiao 		goto error;
808adc0e6abSJack Xiao 	}
809adc0e6abSJack Xiao 
810adc0e6abSJack Xiao 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
811adc0e6abSJack Xiao 	if (r)
812adc0e6abSJack Xiao 		DRM_ERROR("failed to write reg (0x%x)\n", reg);
813adc0e6abSJack Xiao 
814adc0e6abSJack Xiao error:
815adc0e6abSJack Xiao 	return r;
816adc0e6abSJack Xiao }
817adc0e6abSJack Xiao 
amdgpu_mes_reg_write_reg_wait(struct amdgpu_device * adev,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)818adc0e6abSJack Xiao int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
819adc0e6abSJack Xiao 				  uint32_t reg0, uint32_t reg1,
820adc0e6abSJack Xiao 				  uint32_t ref, uint32_t mask)
821adc0e6abSJack Xiao {
822adc0e6abSJack Xiao 	struct mes_misc_op_input op_input;
823adc0e6abSJack Xiao 	int r;
824adc0e6abSJack Xiao 
825adc0e6abSJack Xiao 	op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
826adc0e6abSJack Xiao 	op_input.wrm_reg.reg0 = reg0;
827adc0e6abSJack Xiao 	op_input.wrm_reg.reg1 = reg1;
828adc0e6abSJack Xiao 	op_input.wrm_reg.ref = ref;
829adc0e6abSJack Xiao 	op_input.wrm_reg.mask = mask;
830adc0e6abSJack Xiao 
831adc0e6abSJack Xiao 	if (!adev->mes.funcs->misc_op) {
832adc0e6abSJack Xiao 		DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
833adc0e6abSJack Xiao 		r = -EINVAL;
834adc0e6abSJack Xiao 		goto error;
835adc0e6abSJack Xiao 	}
836adc0e6abSJack Xiao 
837adc0e6abSJack Xiao 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
838adc0e6abSJack Xiao 	if (r)
839adc0e6abSJack Xiao 		DRM_ERROR("failed to reg_write_reg_wait\n");
840adc0e6abSJack Xiao 
841adc0e6abSJack Xiao error:
842adc0e6abSJack Xiao 	return r;
843adc0e6abSJack Xiao }
844adc0e6abSJack Xiao 
amdgpu_mes_reg_wait(struct amdgpu_device * adev,uint32_t reg,uint32_t val,uint32_t mask)845adc0e6abSJack Xiao int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
846adc0e6abSJack Xiao 			uint32_t val, uint32_t mask)
847adc0e6abSJack Xiao {
848adc0e6abSJack Xiao 	struct mes_misc_op_input op_input;
849adc0e6abSJack Xiao 	int r;
850adc0e6abSJack Xiao 
851adc0e6abSJack Xiao 	op_input.op = MES_MISC_OP_WRM_REG_WAIT;
852adc0e6abSJack Xiao 	op_input.wrm_reg.reg0 = reg;
853adc0e6abSJack Xiao 	op_input.wrm_reg.ref = val;
854adc0e6abSJack Xiao 	op_input.wrm_reg.mask = mask;
855adc0e6abSJack Xiao 
856adc0e6abSJack Xiao 	if (!adev->mes.funcs->misc_op) {
857adc0e6abSJack Xiao 		DRM_ERROR("mes reg wait is not supported!\n");
858adc0e6abSJack Xiao 		r = -EINVAL;
859adc0e6abSJack Xiao 		goto error;
860adc0e6abSJack Xiao 	}
861adc0e6abSJack Xiao 
862adc0e6abSJack Xiao 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
863adc0e6abSJack Xiao 	if (r)
864adc0e6abSJack Xiao 		DRM_ERROR("failed to reg_write_reg_wait\n");
865adc0e6abSJack Xiao 
866adc0e6abSJack Xiao error:
867adc0e6abSJack Xiao 	return r;
868adc0e6abSJack Xiao }
869adc0e6abSJack Xiao 
amdgpu_mes_set_shader_debugger(struct amdgpu_device * adev,uint64_t process_context_addr,uint32_t spi_gdbg_per_vmid_cntl,const uint32_t * tcp_watch_cntl,uint32_t flags,bool trap_en)870a9818854SJonathan Kim int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
871a9818854SJonathan Kim 				uint64_t process_context_addr,
872a9818854SJonathan Kim 				uint32_t spi_gdbg_per_vmid_cntl,
873a9818854SJonathan Kim 				const uint32_t *tcp_watch_cntl,
87409d49e14SJonathan Kim 				uint32_t flags,
87509d49e14SJonathan Kim 				bool trap_en)
876a9818854SJonathan Kim {
877a9818854SJonathan Kim 	struct mes_misc_op_input op_input = {0};
878a9818854SJonathan Kim 	int r;
879a9818854SJonathan Kim 
880a9818854SJonathan Kim 	if (!adev->mes.funcs->misc_op) {
881a9818854SJonathan Kim 		DRM_ERROR("mes set shader debugger is not supported!\n");
882a9818854SJonathan Kim 		return -EINVAL;
883a9818854SJonathan Kim 	}
884a9818854SJonathan Kim 
885a9818854SJonathan Kim 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
886a9818854SJonathan Kim 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
887a9818854SJonathan Kim 	op_input.set_shader_debugger.flags.u32all = flags;
8883a950c56SJonathan Kim 
8893a950c56SJonathan Kim 	/* use amdgpu mes_flush_shader_debugger instead */
8903a950c56SJonathan Kim 	if (op_input.set_shader_debugger.flags.process_ctx_flush)
8913a950c56SJonathan Kim 		return -EINVAL;
8923a950c56SJonathan Kim 
893a9818854SJonathan Kim 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
894a9818854SJonathan Kim 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
895a9818854SJonathan Kim 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
896a9818854SJonathan Kim 
89709d49e14SJonathan Kim 	if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
89809d49e14SJonathan Kim 			AMDGPU_MES_API_VERSION_SHIFT) >= 14)
89909d49e14SJonathan Kim 		op_input.set_shader_debugger.trap_en = trap_en;
90009d49e14SJonathan Kim 
901a9818854SJonathan Kim 	amdgpu_mes_lock(&adev->mes);
902a9818854SJonathan Kim 
903a9818854SJonathan Kim 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
904a9818854SJonathan Kim 	if (r)
905a9818854SJonathan Kim 		DRM_ERROR("failed to set_shader_debugger\n");
906a9818854SJonathan Kim 
907a9818854SJonathan Kim 	amdgpu_mes_unlock(&adev->mes);
908a9818854SJonathan Kim 
909a9818854SJonathan Kim 	return r;
910a9818854SJonathan Kim }
911a9818854SJonathan Kim 
amdgpu_mes_flush_shader_debugger(struct amdgpu_device * adev,uint64_t process_context_addr)9123a950c56SJonathan Kim int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
9133a950c56SJonathan Kim 				     uint64_t process_context_addr)
9143a950c56SJonathan Kim {
9153a950c56SJonathan Kim 	struct mes_misc_op_input op_input = {0};
9163a950c56SJonathan Kim 	int r;
9173a950c56SJonathan Kim 
9183a950c56SJonathan Kim 	if (!adev->mes.funcs->misc_op) {
9193a950c56SJonathan Kim 		DRM_ERROR("mes flush shader debugger is not supported!\n");
9203a950c56SJonathan Kim 		return -EINVAL;
9213a950c56SJonathan Kim 	}
9223a950c56SJonathan Kim 
9233a950c56SJonathan Kim 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
9243a950c56SJonathan Kim 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
9253a950c56SJonathan Kim 	op_input.set_shader_debugger.flags.process_ctx_flush = true;
9263a950c56SJonathan Kim 
9273a950c56SJonathan Kim 	amdgpu_mes_lock(&adev->mes);
9283a950c56SJonathan Kim 
9293a950c56SJonathan Kim 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
9303a950c56SJonathan Kim 	if (r)
9313a950c56SJonathan Kim 		DRM_ERROR("failed to set_shader_debugger\n");
9323a950c56SJonathan Kim 
9333a950c56SJonathan Kim 	amdgpu_mes_unlock(&adev->mes);
9343a950c56SJonathan Kim 
9353a950c56SJonathan Kim 	return r;
9363a950c56SJonathan Kim }
9373a950c56SJonathan Kim 
9381a27aacbSJack Xiao static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device * adev,struct amdgpu_ring * ring,struct amdgpu_mes_queue_properties * props)9391a27aacbSJack Xiao amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
9401a27aacbSJack Xiao 			       struct amdgpu_ring *ring,
9411a27aacbSJack Xiao 			       struct amdgpu_mes_queue_properties *props)
9421a27aacbSJack Xiao {
9431a27aacbSJack Xiao 	props->queue_type = ring->funcs->type;
9441a27aacbSJack Xiao 	props->hqd_base_gpu_addr = ring->gpu_addr;
9451a27aacbSJack Xiao 	props->rptr_gpu_addr = ring->rptr_gpu_addr;
9461a27aacbSJack Xiao 	props->wptr_gpu_addr = ring->wptr_gpu_addr;
947fe4e9ff9SJack Xiao 	props->wptr_mc_addr =
948fe4e9ff9SJack Xiao 		ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
9491a27aacbSJack Xiao 	props->queue_size = ring->ring_size;
9501a27aacbSJack Xiao 	props->eop_gpu_addr = ring->eop_gpu_addr;
9511a27aacbSJack Xiao 	props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
9521a27aacbSJack Xiao 	props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
9531a27aacbSJack Xiao 	props->paging = false;
9541a27aacbSJack Xiao 	props->ring = ring;
9551a27aacbSJack Xiao }
95611ec5b36SJack Xiao 
95711ec5b36SJack Xiao #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)			\
95811ec5b36SJack Xiao do {									\
95911ec5b36SJack Xiao        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)				\
96011ec5b36SJack Xiao 		return offsetof(struct amdgpu_mes_ctx_meta_data,	\
96111ec5b36SJack Xiao 				_eng[ring->idx].slots[id_offs]);        \
96211ec5b36SJack Xiao        else if (id_offs == AMDGPU_MES_CTX_RING_OFFS)			\
96311ec5b36SJack Xiao 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
96411ec5b36SJack Xiao 				_eng[ring->idx].ring);                  \
96511ec5b36SJack Xiao        else if (id_offs == AMDGPU_MES_CTX_IB_OFFS)			\
96611ec5b36SJack Xiao 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
96711ec5b36SJack Xiao 				_eng[ring->idx].ib);                    \
96811ec5b36SJack Xiao        else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS)			\
96911ec5b36SJack Xiao 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
97011ec5b36SJack Xiao 				_eng[ring->idx].padding);               \
97111ec5b36SJack Xiao } while(0)
97211ec5b36SJack Xiao 
amdgpu_mes_ctx_get_offs(struct amdgpu_ring * ring,unsigned int id_offs)97311ec5b36SJack Xiao int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
97411ec5b36SJack Xiao {
97511ec5b36SJack Xiao 	switch (ring->funcs->type) {
97611ec5b36SJack Xiao 	case AMDGPU_RING_TYPE_GFX:
97711ec5b36SJack Xiao 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
97811ec5b36SJack Xiao 		break;
97911ec5b36SJack Xiao 	case AMDGPU_RING_TYPE_COMPUTE:
98011ec5b36SJack Xiao 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
98111ec5b36SJack Xiao 		break;
98211ec5b36SJack Xiao 	case AMDGPU_RING_TYPE_SDMA:
98311ec5b36SJack Xiao 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
98411ec5b36SJack Xiao 		break;
98511ec5b36SJack Xiao 	default:
98611ec5b36SJack Xiao 		break;
98711ec5b36SJack Xiao 	}
98811ec5b36SJack Xiao 
98911ec5b36SJack Xiao 	WARN_ON(1);
99011ec5b36SJack Xiao 	return -EINVAL;
99111ec5b36SJack Xiao }
992d0c423b6SJack Xiao 
amdgpu_mes_add_ring(struct amdgpu_device * adev,int gang_id,int queue_type,int idx,struct amdgpu_mes_ctx_data * ctx_data,struct amdgpu_ring ** out)993d0c423b6SJack Xiao int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
994d0c423b6SJack Xiao 			int queue_type, int idx,
995d0c423b6SJack Xiao 			struct amdgpu_mes_ctx_data *ctx_data,
996d0c423b6SJack Xiao 			struct amdgpu_ring **out)
997d0c423b6SJack Xiao {
998d0c423b6SJack Xiao 	struct amdgpu_ring *ring;
999d0c423b6SJack Xiao 	struct amdgpu_mes_gang *gang;
1000d0c423b6SJack Xiao 	struct amdgpu_mes_queue_properties qprops = {0};
1001d0c423b6SJack Xiao 	int r, queue_id, pasid;
1002d0c423b6SJack Xiao 
100318ee4ce6SJack Xiao 	/*
100418ee4ce6SJack Xiao 	 * Avoid taking any other locks under MES lock to avoid circular
100518ee4ce6SJack Xiao 	 * lock dependencies.
100618ee4ce6SJack Xiao 	 */
100718ee4ce6SJack Xiao 	amdgpu_mes_lock(&adev->mes);
1008d0c423b6SJack Xiao 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
1009d0c423b6SJack Xiao 	if (!gang) {
1010d0c423b6SJack Xiao 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
101118ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
1012d0c423b6SJack Xiao 		return -EINVAL;
1013d0c423b6SJack Xiao 	}
1014d0c423b6SJack Xiao 	pasid = gang->process->pasid;
1015d0c423b6SJack Xiao 
1016d0c423b6SJack Xiao 	ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1017d0c423b6SJack Xiao 	if (!ring) {
101818ee4ce6SJack Xiao 		amdgpu_mes_unlock(&adev->mes);
1019d0c423b6SJack Xiao 		return -ENOMEM;
1020d0c423b6SJack Xiao 	}
1021d0c423b6SJack Xiao 
1022d0c423b6SJack Xiao 	ring->ring_obj = NULL;
1023d0c423b6SJack Xiao 	ring->use_doorbell = true;
1024d0c423b6SJack Xiao 	ring->is_mes_queue = true;
1025d0c423b6SJack Xiao 	ring->mes_ctx = ctx_data;
1026d0c423b6SJack Xiao 	ring->idx = idx;
1027d0c423b6SJack Xiao 	ring->no_scheduler = true;
1028d0c423b6SJack Xiao 
1029d0c423b6SJack Xiao 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030d0c423b6SJack Xiao 		int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1031d0c423b6SJack Xiao 				      compute[ring->idx].mec_hpd);
1032d0c423b6SJack Xiao 		ring->eop_gpu_addr =
1033d0c423b6SJack Xiao 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1034d0c423b6SJack Xiao 	}
1035d0c423b6SJack Xiao 
1036d0c423b6SJack Xiao 	switch (queue_type) {
1037d0c423b6SJack Xiao 	case AMDGPU_RING_TYPE_GFX:
1038d0c423b6SJack Xiao 		ring->funcs = adev->gfx.gfx_ring[0].funcs;
1039553d2683STim Huang 		ring->me = adev->gfx.gfx_ring[0].me;
1040553d2683STim Huang 		ring->pipe = adev->gfx.gfx_ring[0].pipe;
1041d0c423b6SJack Xiao 		break;
1042d0c423b6SJack Xiao 	case AMDGPU_RING_TYPE_COMPUTE:
1043d0c423b6SJack Xiao 		ring->funcs = adev->gfx.compute_ring[0].funcs;
1044553d2683STim Huang 		ring->me = adev->gfx.compute_ring[0].me;
1045553d2683STim Huang 		ring->pipe = adev->gfx.compute_ring[0].pipe;
1046d0c423b6SJack Xiao 		break;
1047d0c423b6SJack Xiao 	case AMDGPU_RING_TYPE_SDMA:
1048d0c423b6SJack Xiao 		ring->funcs = adev->sdma.instance[0].ring.funcs;
1049d0c423b6SJack Xiao 		break;
1050d0c423b6SJack Xiao 	default:
1051d0c423b6SJack Xiao 		BUG();
1052d0c423b6SJack Xiao 	}
1053d0c423b6SJack Xiao 
1054d0c423b6SJack Xiao 	r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1055d0c423b6SJack Xiao 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
1056d0c423b6SJack Xiao 	if (r)
1057d0c423b6SJack Xiao 		goto clean_up_memory;
1058d0c423b6SJack Xiao 
1059d0c423b6SJack Xiao 	amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1060d0c423b6SJack Xiao 
1061d0c423b6SJack Xiao 	dma_fence_wait(gang->process->vm->last_update, false);
1062d0c423b6SJack Xiao 	dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
106318ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
1064d0c423b6SJack Xiao 
1065d0c423b6SJack Xiao 	r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1066d0c423b6SJack Xiao 	if (r)
1067d0c423b6SJack Xiao 		goto clean_up_ring;
1068d0c423b6SJack Xiao 
1069d0c423b6SJack Xiao 	ring->hw_queue_id = queue_id;
1070d0c423b6SJack Xiao 	ring->doorbell_index = qprops.doorbell_off;
1071d0c423b6SJack Xiao 
1072d0c423b6SJack Xiao 	if (queue_type == AMDGPU_RING_TYPE_GFX)
1073d0c423b6SJack Xiao 		sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
1074d0c423b6SJack Xiao 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1075d0c423b6SJack Xiao 		sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1076d0c423b6SJack Xiao 			queue_id);
1077d0c423b6SJack Xiao 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1078d0c423b6SJack Xiao 		sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1079d0c423b6SJack Xiao 			queue_id);
1080d0c423b6SJack Xiao 	else
1081d0c423b6SJack Xiao 		BUG();
1082d0c423b6SJack Xiao 
1083d0c423b6SJack Xiao 	*out = ring;
1084d0c423b6SJack Xiao 	return 0;
1085d0c423b6SJack Xiao 
1086d0c423b6SJack Xiao clean_up_ring:
1087d0c423b6SJack Xiao 	amdgpu_ring_fini(ring);
1088d0c423b6SJack Xiao clean_up_memory:
1089d0c423b6SJack Xiao 	kfree(ring);
109018ee4ce6SJack Xiao 	amdgpu_mes_unlock(&adev->mes);
1091d0c423b6SJack Xiao 	return r;
1092d0c423b6SJack Xiao }
10939cc654c8SJack Xiao 
amdgpu_mes_remove_ring(struct amdgpu_device * adev,struct amdgpu_ring * ring)10949cc654c8SJack Xiao void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
10959cc654c8SJack Xiao 			    struct amdgpu_ring *ring)
10969cc654c8SJack Xiao {
10979cc654c8SJack Xiao 	if (!ring)
10989cc654c8SJack Xiao 		return;
10999cc654c8SJack Xiao 
11009cc654c8SJack Xiao 	amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
110139cfce75SJack Xiao 	del_timer_sync(&ring->fence_drv.fallback_timer);
11029cc654c8SJack Xiao 	amdgpu_ring_fini(ring);
11039cc654c8SJack Xiao 	kfree(ring);
11049cc654c8SJack Xiao }
1105e3652b09SJack Xiao 
amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device * adev,enum amdgpu_mes_priority_level prio)11062d7a1f71SLe Ma uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
11072d7a1f71SLe Ma 						   enum amdgpu_mes_priority_level prio)
11082d7a1f71SLe Ma {
11092d7a1f71SLe Ma 	return adev->mes.aggregated_doorbells[prio];
11102d7a1f71SLe Ma }
11112d7a1f71SLe Ma 
amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device * adev,struct amdgpu_mes_ctx_data * ctx_data)1112e3652b09SJack Xiao int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
1113e3652b09SJack Xiao 				   struct amdgpu_mes_ctx_data *ctx_data)
1114e3652b09SJack Xiao {
1115e3652b09SJack Xiao 	int r;
1116e3652b09SJack Xiao 
1117e3652b09SJack Xiao 	r = amdgpu_bo_create_kernel(adev,
1118e3652b09SJack Xiao 			    sizeof(struct amdgpu_mes_ctx_meta_data),
1119e3652b09SJack Xiao 			    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1120fe4e9ff9SJack Xiao 			    &ctx_data->meta_data_obj,
1121fe4e9ff9SJack Xiao 			    &ctx_data->meta_data_mc_addr,
1122e3652b09SJack Xiao 			    &ctx_data->meta_data_ptr);
11230b9ff428SLee Jones 	if (r) {
11240b9ff428SLee Jones 		dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
11250b9ff428SLee Jones 		return r;
11260b9ff428SLee Jones 	}
11270b9ff428SLee Jones 
1128e3652b09SJack Xiao 	if (!ctx_data->meta_data_obj)
1129e3652b09SJack Xiao 		return -ENOMEM;
1130e3652b09SJack Xiao 
1131e3652b09SJack Xiao 	memset(ctx_data->meta_data_ptr, 0,
1132e3652b09SJack Xiao 	       sizeof(struct amdgpu_mes_ctx_meta_data));
1133e3652b09SJack Xiao 
1134e3652b09SJack Xiao 	return 0;
1135e3652b09SJack Xiao }
1136e3652b09SJack Xiao 
amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data * ctx_data)1137e3652b09SJack Xiao void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
1138e3652b09SJack Xiao {
1139e3652b09SJack Xiao 	if (ctx_data->meta_data_obj)
1140fe4e9ff9SJack Xiao 		amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
1141fe4e9ff9SJack Xiao 				      &ctx_data->meta_data_mc_addr,
1142fe4e9ff9SJack Xiao 				      &ctx_data->meta_data_ptr);
1143e3652b09SJack Xiao }
1144a22f760aSJack Xiao 
amdgpu_mes_ctx_map_meta_data(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct amdgpu_mes_ctx_data * ctx_data)11457c18b40eSJack Xiao int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
1146a22f760aSJack Xiao 				 struct amdgpu_vm *vm,
1147a22f760aSJack Xiao 				 struct amdgpu_mes_ctx_data *ctx_data)
1148a22f760aSJack Xiao {
11497c18b40eSJack Xiao 	struct amdgpu_bo_va *bo_va;
11507c18b40eSJack Xiao 	struct amdgpu_sync sync;
11512acc73f8SChristian König 	struct drm_exec exec;
1152a22f760aSJack Xiao 	int r;
1153a22f760aSJack Xiao 
11547c18b40eSJack Xiao 	amdgpu_sync_create(&sync);
1155a22f760aSJack Xiao 
11562acc73f8SChristian König 	drm_exec_init(&exec, 0);
11572acc73f8SChristian König 	drm_exec_until_all_locked(&exec) {
11582acc73f8SChristian König 		r = drm_exec_lock_obj(&exec,
11592acc73f8SChristian König 				      &ctx_data->meta_data_obj->tbo.base);
11602acc73f8SChristian König 		drm_exec_retry_on_contention(&exec);
11612acc73f8SChristian König 		if (unlikely(r))
11622acc73f8SChristian König 			goto error_fini_exec;
11637c18b40eSJack Xiao 
11642acc73f8SChristian König 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
11652acc73f8SChristian König 		drm_exec_retry_on_contention(&exec);
11662acc73f8SChristian König 		if (unlikely(r))
11672acc73f8SChristian König 			goto error_fini_exec;
11687c18b40eSJack Xiao 	}
11697c18b40eSJack Xiao 
11707c18b40eSJack Xiao 	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
11717c18b40eSJack Xiao 	if (!bo_va) {
11727c18b40eSJack Xiao 		DRM_ERROR("failed to create bo_va for meta data BO\n");
11732acc73f8SChristian König 		r = -ENOMEM;
11742acc73f8SChristian König 		goto error_fini_exec;
11757c18b40eSJack Xiao 	}
11767c18b40eSJack Xiao 
11777c18b40eSJack Xiao 	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
11787c18b40eSJack Xiao 			     sizeof(struct amdgpu_mes_ctx_meta_data),
11797c18b40eSJack Xiao 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
11807c18b40eSJack Xiao 			     AMDGPU_PTE_EXECUTABLE);
11817c18b40eSJack Xiao 
11827c18b40eSJack Xiao 	if (r) {
11837c18b40eSJack Xiao 		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
11842acc73f8SChristian König 		goto error_del_bo_va;
11857c18b40eSJack Xiao 	}
11867c18b40eSJack Xiao 
11877c18b40eSJack Xiao 	r = amdgpu_vm_bo_update(adev, bo_va, false);
11887c18b40eSJack Xiao 	if (r) {
11897c18b40eSJack Xiao 		DRM_ERROR("failed to do vm_bo_update on meta data\n");
11902acc73f8SChristian König 		goto error_del_bo_va;
11917c18b40eSJack Xiao 	}
11927c18b40eSJack Xiao 	amdgpu_sync_fence(&sync, bo_va->last_pt_update);
1193a22f760aSJack Xiao 
1194a22f760aSJack Xiao 	r = amdgpu_vm_update_pdes(adev, vm, false);
11957c18b40eSJack Xiao 	if (r) {
11967c18b40eSJack Xiao 		DRM_ERROR("failed to update pdes on meta data\n");
11972acc73f8SChristian König 		goto error_del_bo_va;
11987c18b40eSJack Xiao 	}
11997c18b40eSJack Xiao 	amdgpu_sync_fence(&sync, vm->last_update);
1200a22f760aSJack Xiao 
12017c18b40eSJack Xiao 	amdgpu_sync_wait(&sync, false);
12022acc73f8SChristian König 	drm_exec_fini(&exec);
1203a22f760aSJack Xiao 
12047c18b40eSJack Xiao 	amdgpu_sync_free(&sync);
12057c18b40eSJack Xiao 	ctx_data->meta_data_va = bo_va;
1206a22f760aSJack Xiao 	return 0;
1207a22f760aSJack Xiao 
12082acc73f8SChristian König error_del_bo_va:
12097c18b40eSJack Xiao 	amdgpu_vm_bo_del(adev, bo_va);
12102acc73f8SChristian König 
12112acc73f8SChristian König error_fini_exec:
12122acc73f8SChristian König 	drm_exec_fini(&exec);
12137c18b40eSJack Xiao 	amdgpu_sync_free(&sync);
1214a22f760aSJack Xiao 	return r;
1215a22f760aSJack Xiao }
1216f1d93c9cSJack Xiao 
amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device * adev,struct amdgpu_mes_ctx_data * ctx_data)1217737dad0bSJack Xiao int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
1218737dad0bSJack Xiao 				   struct amdgpu_mes_ctx_data *ctx_data)
1219737dad0bSJack Xiao {
1220737dad0bSJack Xiao 	struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
1221737dad0bSJack Xiao 	struct amdgpu_bo *bo = ctx_data->meta_data_obj;
1222737dad0bSJack Xiao 	struct amdgpu_vm *vm = bo_va->base.vm;
12232acc73f8SChristian König 	struct dma_fence *fence;
12242acc73f8SChristian König 	struct drm_exec exec;
12252acc73f8SChristian König 	long r;
1226737dad0bSJack Xiao 
12272acc73f8SChristian König 	drm_exec_init(&exec, 0);
12282acc73f8SChristian König 	drm_exec_until_all_locked(&exec) {
12292acc73f8SChristian König 		r = drm_exec_lock_obj(&exec,
12302acc73f8SChristian König 				      &ctx_data->meta_data_obj->tbo.base);
12312acc73f8SChristian König 		drm_exec_retry_on_contention(&exec);
12322acc73f8SChristian König 		if (unlikely(r))
12332acc73f8SChristian König 			goto out_unlock;
1234737dad0bSJack Xiao 
12352acc73f8SChristian König 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
12362acc73f8SChristian König 		drm_exec_retry_on_contention(&exec);
12372acc73f8SChristian König 		if (unlikely(r))
12382acc73f8SChristian König 			goto out_unlock;
1239737dad0bSJack Xiao 	}
1240737dad0bSJack Xiao 
1241737dad0bSJack Xiao 	amdgpu_vm_bo_del(adev, bo_va);
1242737dad0bSJack Xiao 	if (!amdgpu_vm_ready(vm))
1243737dad0bSJack Xiao 		goto out_unlock;
1244737dad0bSJack Xiao 
12452acc73f8SChristian König 	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
12462acc73f8SChristian König 				   &fence);
1247737dad0bSJack Xiao 	if (r)
1248737dad0bSJack Xiao 		goto out_unlock;
1249737dad0bSJack Xiao 	if (fence) {
1250737dad0bSJack Xiao 		amdgpu_bo_fence(bo, fence, true);
1251737dad0bSJack Xiao 		fence = NULL;
1252737dad0bSJack Xiao 	}
1253737dad0bSJack Xiao 
1254737dad0bSJack Xiao 	r = amdgpu_vm_clear_freed(adev, vm, &fence);
1255737dad0bSJack Xiao 	if (r || !fence)
1256737dad0bSJack Xiao 		goto out_unlock;
1257737dad0bSJack Xiao 
1258737dad0bSJack Xiao 	dma_fence_wait(fence, false);
1259737dad0bSJack Xiao 	amdgpu_bo_fence(bo, fence, true);
1260737dad0bSJack Xiao 	dma_fence_put(fence);
1261737dad0bSJack Xiao 
1262737dad0bSJack Xiao out_unlock:
1263737dad0bSJack Xiao 	if (unlikely(r < 0))
1264737dad0bSJack Xiao 		dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
12652acc73f8SChristian König 	drm_exec_fini(&exec);
1266737dad0bSJack Xiao 
1267737dad0bSJack Xiao 	return r;
1268737dad0bSJack Xiao }
1269737dad0bSJack Xiao 
amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device * adev,int pasid,int * gang_id,int queue_type,int num_queue,struct amdgpu_ring ** added_rings,struct amdgpu_mes_ctx_data * ctx_data)1270f1d93c9cSJack Xiao static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
1271f1d93c9cSJack Xiao 					  int pasid, int *gang_id,
1272f1d93c9cSJack Xiao 					  int queue_type, int num_queue,
1273f1d93c9cSJack Xiao 					  struct amdgpu_ring **added_rings,
1274f1d93c9cSJack Xiao 					  struct amdgpu_mes_ctx_data *ctx_data)
1275f1d93c9cSJack Xiao {
1276f1d93c9cSJack Xiao 	struct amdgpu_ring *ring;
1277f1d93c9cSJack Xiao 	struct amdgpu_mes_gang_properties gprops = {0};
1278f1d93c9cSJack Xiao 	int r, j;
1279f1d93c9cSJack Xiao 
1280f1d93c9cSJack Xiao 	/* create a gang for the process */
1281f1d93c9cSJack Xiao 	gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1282f1d93c9cSJack Xiao 	gprops.gang_quantum = adev->mes.default_gang_quantum;
1283f1d93c9cSJack Xiao 	gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1284f1d93c9cSJack Xiao 	gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1285f1d93c9cSJack Xiao 	gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1286f1d93c9cSJack Xiao 
1287f1d93c9cSJack Xiao 	r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
1288f1d93c9cSJack Xiao 	if (r) {
1289f1d93c9cSJack Xiao 		DRM_ERROR("failed to add gang\n");
1290f1d93c9cSJack Xiao 		return r;
1291f1d93c9cSJack Xiao 	}
1292f1d93c9cSJack Xiao 
1293f1d93c9cSJack Xiao 	/* create queues for the gang */
1294f1d93c9cSJack Xiao 	for (j = 0; j < num_queue; j++) {
1295f1d93c9cSJack Xiao 		r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
1296f1d93c9cSJack Xiao 					ctx_data, &ring);
1297f1d93c9cSJack Xiao 		if (r) {
1298f1d93c9cSJack Xiao 			DRM_ERROR("failed to add ring\n");
1299f1d93c9cSJack Xiao 			break;
1300f1d93c9cSJack Xiao 		}
1301f1d93c9cSJack Xiao 
1302f1d93c9cSJack Xiao 		DRM_INFO("ring %s was added\n", ring->name);
1303f1d93c9cSJack Xiao 		added_rings[j] = ring;
1304f1d93c9cSJack Xiao 	}
1305f1d93c9cSJack Xiao 
1306f1d93c9cSJack Xiao 	return 0;
1307f1d93c9cSJack Xiao }
1308cdb7476dSJack Xiao 
amdgpu_mes_test_queues(struct amdgpu_ring ** added_rings)1309cdb7476dSJack Xiao static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
1310cdb7476dSJack Xiao {
1311cdb7476dSJack Xiao 	struct amdgpu_ring *ring;
1312cdb7476dSJack Xiao 	int i, r;
1313cdb7476dSJack Xiao 
1314cdb7476dSJack Xiao 	for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
1315cdb7476dSJack Xiao 		ring = added_rings[i];
1316cdb7476dSJack Xiao 		if (!ring)
1317cdb7476dSJack Xiao 			continue;
1318cdb7476dSJack Xiao 
131993ab59acSGuchun Chen 		r = amdgpu_ring_test_helper(ring);
132093ab59acSGuchun Chen 		if (r)
1321cdb7476dSJack Xiao 			return r;
1322cdb7476dSJack Xiao 
1323cdb7476dSJack Xiao 		r = amdgpu_ring_test_ib(ring, 1000 * 10);
1324cdb7476dSJack Xiao 		if (r) {
1325cdb7476dSJack Xiao 			DRM_DEV_ERROR(ring->adev->dev,
1326cdb7476dSJack Xiao 				      "ring %s ib test failed (%d)\n",
1327cdb7476dSJack Xiao 				      ring->name, r);
1328cdb7476dSJack Xiao 			return r;
1329cdb7476dSJack Xiao 		} else
1330cdb7476dSJack Xiao 			DRM_INFO("ring %s ib test pass\n", ring->name);
1331cdb7476dSJack Xiao 	}
1332cdb7476dSJack Xiao 
1333cdb7476dSJack Xiao 	return 0;
1334cdb7476dSJack Xiao }
13356624d161SJack Xiao 
amdgpu_mes_self_test(struct amdgpu_device * adev)13366624d161SJack Xiao int amdgpu_mes_self_test(struct amdgpu_device *adev)
13376624d161SJack Xiao {
13386624d161SJack Xiao 	struct amdgpu_vm *vm = NULL;
13396624d161SJack Xiao 	struct amdgpu_mes_ctx_data ctx_data = {0};
13406624d161SJack Xiao 	struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
13416624d161SJack Xiao 	int gang_ids[3] = {0};
13425ee33d90SJack Xiao 	int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
13435ee33d90SJack Xiao 				 { AMDGPU_RING_TYPE_COMPUTE, 1 },
13445ee33d90SJack Xiao 				 { AMDGPU_RING_TYPE_SDMA, 1} };
13456624d161SJack Xiao 	int i, r, pasid, k = 0;
13466624d161SJack Xiao 
13476624d161SJack Xiao 	pasid = amdgpu_pasid_alloc(16);
13486624d161SJack Xiao 	if (pasid < 0) {
13496624d161SJack Xiao 		dev_warn(adev->dev, "No more PASIDs available!");
13506624d161SJack Xiao 		pasid = 0;
13516624d161SJack Xiao 	}
13526624d161SJack Xiao 
13536624d161SJack Xiao 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
13546624d161SJack Xiao 	if (!vm) {
13556624d161SJack Xiao 		r = -ENOMEM;
13566624d161SJack Xiao 		goto error_pasid;
13576624d161SJack Xiao 	}
13586624d161SJack Xiao 
13595003ca63SGuchun Chen 	r = amdgpu_vm_init(adev, vm, -1);
13606624d161SJack Xiao 	if (r) {
13616624d161SJack Xiao 		DRM_ERROR("failed to initialize vm\n");
13626624d161SJack Xiao 		goto error_pasid;
13636624d161SJack Xiao 	}
13646624d161SJack Xiao 
13656624d161SJack Xiao 	r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
13666624d161SJack Xiao 	if (r) {
13676624d161SJack Xiao 		DRM_ERROR("failed to alloc ctx meta data\n");
1368c3c48339SJianglei Nie 		goto error_fini;
13696624d161SJack Xiao 	}
13706624d161SJack Xiao 
13717c18b40eSJack Xiao 	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
13727c18b40eSJack Xiao 	r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
13736624d161SJack Xiao 	if (r) {
13746624d161SJack Xiao 		DRM_ERROR("failed to map ctx meta data\n");
13756624d161SJack Xiao 		goto error_vm;
13766624d161SJack Xiao 	}
13776624d161SJack Xiao 
13786624d161SJack Xiao 	r = amdgpu_mes_create_process(adev, pasid, vm);
13796624d161SJack Xiao 	if (r) {
13806624d161SJack Xiao 		DRM_ERROR("failed to create MES process\n");
13816624d161SJack Xiao 		goto error_vm;
13826624d161SJack Xiao 	}
13836624d161SJack Xiao 
13846624d161SJack Xiao 	for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
138518ee4ce6SJack Xiao 		/* On GFX v10.3, fw hasn't supported to map sdma queue. */
138618ee4ce6SJack Xiao 		if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
138718ee4ce6SJack Xiao 		    adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
138818ee4ce6SJack Xiao 		    queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
1389da1c0338SJack Xiao 			continue;
1390da1c0338SJack Xiao 
13916624d161SJack Xiao 		r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
13926624d161SJack Xiao 							   &gang_ids[i],
13936624d161SJack Xiao 							   queue_types[i][0],
13946624d161SJack Xiao 							   queue_types[i][1],
13956624d161SJack Xiao 							   &added_rings[k],
13966624d161SJack Xiao 							   &ctx_data);
13976624d161SJack Xiao 		if (r)
13986624d161SJack Xiao 			goto error_queues;
13996624d161SJack Xiao 
14006624d161SJack Xiao 		k += queue_types[i][1];
14016624d161SJack Xiao 	}
14026624d161SJack Xiao 
14036624d161SJack Xiao 	/* start ring test and ib test for MES queues */
14046624d161SJack Xiao 	amdgpu_mes_test_queues(added_rings);
14056624d161SJack Xiao 
14066624d161SJack Xiao error_queues:
14076624d161SJack Xiao 	/* remove all queues */
14086624d161SJack Xiao 	for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
14096624d161SJack Xiao 		if (!added_rings[i])
14106624d161SJack Xiao 			continue;
14116624d161SJack Xiao 		amdgpu_mes_remove_ring(adev, added_rings[i]);
14126624d161SJack Xiao 	}
14136624d161SJack Xiao 
14146624d161SJack Xiao 	for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
14156624d161SJack Xiao 		if (!gang_ids[i])
14166624d161SJack Xiao 			continue;
14176624d161SJack Xiao 		amdgpu_mes_remove_gang(adev, gang_ids[i]);
14186624d161SJack Xiao 	}
14196624d161SJack Xiao 
14206624d161SJack Xiao 	amdgpu_mes_destroy_process(adev, pasid);
14216624d161SJack Xiao 
14226624d161SJack Xiao error_vm:
1423737dad0bSJack Xiao 	amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
1424c3c48339SJianglei Nie 
1425c3c48339SJianglei Nie error_fini:
14266624d161SJack Xiao 	amdgpu_vm_fini(adev, vm);
14276624d161SJack Xiao 
14286624d161SJack Xiao error_pasid:
14296624d161SJack Xiao 	if (pasid)
14306624d161SJack Xiao 		amdgpu_pasid_free(pasid);
14316624d161SJack Xiao 
14326624d161SJack Xiao 	amdgpu_mes_ctx_free_meta_data(&ctx_data);
14336624d161SJack Xiao 	kfree(vm);
14346624d161SJack Xiao 	return 0;
14356624d161SJack Xiao }
1436cc42e76eSMario Limonciello 
amdgpu_mes_init_microcode(struct amdgpu_device * adev,int pipe)1437cc42e76eSMario Limonciello int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
1438cc42e76eSMario Limonciello {
1439cc42e76eSMario Limonciello 	const struct mes_firmware_header_v1_0 *mes_hdr;
1440cc42e76eSMario Limonciello 	struct amdgpu_firmware_info *info;
1441cc42e76eSMario Limonciello 	char ucode_prefix[30];
1442cc42e76eSMario Limonciello 	char fw_name[40];
144397998b89SJack Xiao 	bool need_retry = false;
1444cc42e76eSMario Limonciello 	int r;
1445cc42e76eSMario Limonciello 
144697998b89SJack Xiao 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
144797998b89SJack Xiao 				       sizeof(ucode_prefix));
144897998b89SJack Xiao 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
144997998b89SJack Xiao 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
145097998b89SJack Xiao 			 ucode_prefix,
145197998b89SJack Xiao 			 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
145297998b89SJack Xiao 		need_retry = true;
145397998b89SJack Xiao 	} else {
1454cc42e76eSMario Limonciello 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1455cc42e76eSMario Limonciello 			 ucode_prefix,
1456cc42e76eSMario Limonciello 			 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
145797998b89SJack Xiao 	}
145897998b89SJack Xiao 
145911e0b006SMario Limonciello 	r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
146097998b89SJack Xiao 	if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
146197998b89SJack Xiao 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
146297998b89SJack Xiao 			 ucode_prefix);
146397998b89SJack Xiao 		DRM_INFO("try to fall back to %s\n", fw_name);
146497998b89SJack Xiao 		r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
146597998b89SJack Xiao 					 fw_name);
146697998b89SJack Xiao 	}
146797998b89SJack Xiao 
1468cc42e76eSMario Limonciello 	if (r)
1469cc42e76eSMario Limonciello 		goto out;
1470cc42e76eSMario Limonciello 
1471cc42e76eSMario Limonciello 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
1472cc42e76eSMario Limonciello 		adev->mes.fw[pipe]->data;
1473cc42e76eSMario Limonciello 	adev->mes.uc_start_addr[pipe] =
1474cc42e76eSMario Limonciello 		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
1475cc42e76eSMario Limonciello 		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
1476cc42e76eSMario Limonciello 	adev->mes.data_start_addr[pipe] =
1477cc42e76eSMario Limonciello 		le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
1478cc42e76eSMario Limonciello 		((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
1479cc42e76eSMario Limonciello 
1480cc42e76eSMario Limonciello 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1481cc42e76eSMario Limonciello 		int ucode, ucode_data;
1482cc42e76eSMario Limonciello 
1483cc42e76eSMario Limonciello 		if (pipe == AMDGPU_MES_SCHED_PIPE) {
1484cc42e76eSMario Limonciello 			ucode = AMDGPU_UCODE_ID_CP_MES;
1485cc42e76eSMario Limonciello 			ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
1486cc42e76eSMario Limonciello 		} else {
1487cc42e76eSMario Limonciello 			ucode = AMDGPU_UCODE_ID_CP_MES1;
1488cc42e76eSMario Limonciello 			ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
1489cc42e76eSMario Limonciello 		}
1490cc42e76eSMario Limonciello 
1491cc42e76eSMario Limonciello 		info = &adev->firmware.ucode[ucode];
1492cc42e76eSMario Limonciello 		info->ucode_id = ucode;
1493cc42e76eSMario Limonciello 		info->fw = adev->mes.fw[pipe];
1494cc42e76eSMario Limonciello 		adev->firmware.fw_size +=
1495cc42e76eSMario Limonciello 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
1496cc42e76eSMario Limonciello 			      PAGE_SIZE);
1497cc42e76eSMario Limonciello 
1498cc42e76eSMario Limonciello 		info = &adev->firmware.ucode[ucode_data];
1499cc42e76eSMario Limonciello 		info->ucode_id = ucode_data;
1500cc42e76eSMario Limonciello 		info->fw = adev->mes.fw[pipe];
1501cc42e76eSMario Limonciello 		adev->firmware.fw_size +=
1502cc42e76eSMario Limonciello 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
1503cc42e76eSMario Limonciello 			      PAGE_SIZE);
1504cc42e76eSMario Limonciello 	}
1505cc42e76eSMario Limonciello 
1506cc42e76eSMario Limonciello 	return 0;
1507cc42e76eSMario Limonciello out:
150811e0b006SMario Limonciello 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
1509cc42e76eSMario Limonciello 	return r;
1510cc42e76eSMario Limonciello }
1511