xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c (revision 8f8d5745bb520c76b81abef4a2cb3023d0313bfd)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "amdgpu_rlc.h"
29 
30 /* delay 0.1 second to enable gfx off feature */
31 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
32 
33 /*
34  * GPU GFX IP block helpers function.
35  */
36 
37 int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
38 			    int pipe, int queue)
39 {
40 	int bit = 0;
41 
42 	bit += mec * adev->gfx.mec.num_pipe_per_mec
43 		* adev->gfx.mec.num_queue_per_pipe;
44 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
45 	bit += queue;
46 
47 	return bit;
48 }
49 
50 void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
51 			     int *mec, int *pipe, int *queue)
52 {
53 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
54 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
55 		% adev->gfx.mec.num_pipe_per_mec;
56 	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
57 	       / adev->gfx.mec.num_pipe_per_mec;
58 
59 }
60 
61 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
62 				     int mec, int pipe, int queue)
63 {
64 	return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
65 			adev->gfx.mec.queue_bitmap);
66 }
67 
68 /**
69  * amdgpu_gfx_scratch_get - Allocate a scratch register
70  *
71  * @adev: amdgpu_device pointer
72  * @reg: scratch register mmio offset
73  *
74  * Allocate a CP scratch register for use by the driver (all asics).
75  * Returns 0 on success or -EINVAL on failure.
76  */
77 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
78 {
79 	int i;
80 
81 	i = ffs(adev->gfx.scratch.free_mask);
82 	if (i != 0 && i <= adev->gfx.scratch.num_reg) {
83 		i--;
84 		adev->gfx.scratch.free_mask &= ~(1u << i);
85 		*reg = adev->gfx.scratch.reg_base + i;
86 		return 0;
87 	}
88 	return -EINVAL;
89 }
90 
91 /**
92  * amdgpu_gfx_scratch_free - Free a scratch register
93  *
94  * @adev: amdgpu_device pointer
95  * @reg: scratch register mmio offset
96  *
97  * Free a CP scratch register allocated for use by the driver (all asics)
98  */
99 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
100 {
101 	adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
102 }
103 
104 /**
105  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
106  *
107  * @mask: array in which the per-shader array disable masks will be stored
108  * @max_se: number of SEs
109  * @max_sh: number of SHs
110  *
111  * The bitmask of CUs to be disabled in the shader array determined by se and
112  * sh is stored in mask[se * max_sh + sh].
113  */
114 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
115 {
116 	unsigned se, sh, cu;
117 	const char *p;
118 
119 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
120 
121 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
122 		return;
123 
124 	p = amdgpu_disable_cu;
125 	for (;;) {
126 		char *next;
127 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
128 		if (ret < 3) {
129 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
130 			return;
131 		}
132 
133 		if (se < max_se && sh < max_sh && cu < 16) {
134 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
135 			mask[se * max_sh + sh] |= 1u << cu;
136 		} else {
137 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
138 				  se, sh, cu);
139 		}
140 
141 		next = strchr(p, ',');
142 		if (!next)
143 			break;
144 		p = next + 1;
145 	}
146 }
147 
148 static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
149 {
150 	if (amdgpu_compute_multipipe != -1) {
151 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
152 			 amdgpu_compute_multipipe);
153 		return amdgpu_compute_multipipe == 1;
154 	}
155 
156 	/* FIXME: spreading the queues across pipes causes perf regressions
157 	 * on POLARIS11 compute workloads */
158 	if (adev->asic_type == CHIP_POLARIS11)
159 		return false;
160 
161 	return adev->gfx.mec.num_mec > 1;
162 }
163 
164 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
165 {
166 	int i, queue, pipe, mec;
167 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
168 
169 	/* policy for amdgpu compute queue ownership */
170 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
171 		queue = i % adev->gfx.mec.num_queue_per_pipe;
172 		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
173 			% adev->gfx.mec.num_pipe_per_mec;
174 		mec = (i / adev->gfx.mec.num_queue_per_pipe)
175 			/ adev->gfx.mec.num_pipe_per_mec;
176 
177 		/* we've run out of HW */
178 		if (mec >= adev->gfx.mec.num_mec)
179 			break;
180 
181 		if (multipipe_policy) {
182 			/* policy: amdgpu owns the first two queues of the first MEC */
183 			if (mec == 0 && queue < 2)
184 				set_bit(i, adev->gfx.mec.queue_bitmap);
185 		} else {
186 			/* policy: amdgpu owns all queues in the first pipe */
187 			if (mec == 0 && pipe == 0)
188 				set_bit(i, adev->gfx.mec.queue_bitmap);
189 		}
190 	}
191 
192 	/* update the number of active compute rings */
193 	adev->gfx.num_compute_rings =
194 		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
195 
196 	/* If you hit this case and edited the policy, you probably just
197 	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
198 	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
199 		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
200 }
201 
202 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
203 				  struct amdgpu_ring *ring)
204 {
205 	int queue_bit;
206 	int mec, pipe, queue;
207 
208 	queue_bit = adev->gfx.mec.num_mec
209 		    * adev->gfx.mec.num_pipe_per_mec
210 		    * adev->gfx.mec.num_queue_per_pipe;
211 
212 	while (queue_bit-- >= 0) {
213 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
214 			continue;
215 
216 		amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
217 
218 		/*
219 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
220 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
221 		 * only can be issued on queue 0.
222 		 */
223 		if ((mec == 1 && pipe > 1) || queue != 0)
224 			continue;
225 
226 		ring->me = mec + 1;
227 		ring->pipe = pipe;
228 		ring->queue = queue;
229 
230 		return 0;
231 	}
232 
233 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
234 	return -EINVAL;
235 }
236 
237 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
238 			     struct amdgpu_ring *ring,
239 			     struct amdgpu_irq_src *irq)
240 {
241 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
242 	int r = 0;
243 
244 	spin_lock_init(&kiq->ring_lock);
245 
246 	r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
247 	if (r)
248 		return r;
249 
250 	ring->adev = NULL;
251 	ring->ring_obj = NULL;
252 	ring->use_doorbell = true;
253 	ring->doorbell_index = adev->doorbell_index.kiq;
254 
255 	r = amdgpu_gfx_kiq_acquire(adev, ring);
256 	if (r)
257 		return r;
258 
259 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
260 	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
261 	r = amdgpu_ring_init(adev, ring, 1024,
262 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
263 	if (r)
264 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
265 
266 	return r;
267 }
268 
269 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
270 			      struct amdgpu_irq_src *irq)
271 {
272 	amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
273 	amdgpu_ring_fini(ring);
274 }
275 
276 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
277 {
278 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
279 
280 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
281 }
282 
283 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
284 			unsigned hpd_size)
285 {
286 	int r;
287 	u32 *hpd;
288 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
289 
290 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
291 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
292 				    &kiq->eop_gpu_addr, (void **)&hpd);
293 	if (r) {
294 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
295 		return r;
296 	}
297 
298 	memset(hpd, 0, hpd_size);
299 
300 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
301 	if (unlikely(r != 0))
302 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
303 	amdgpu_bo_kunmap(kiq->eop_obj);
304 	amdgpu_bo_unreserve(kiq->eop_obj);
305 
306 	return 0;
307 }
308 
309 /* create MQD for each compute queue */
310 int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
311 				   unsigned mqd_size)
312 {
313 	struct amdgpu_ring *ring = NULL;
314 	int r, i;
315 
316 	/* create MQD for KIQ */
317 	ring = &adev->gfx.kiq.ring;
318 	if (!ring->mqd_obj) {
319 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
320 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
321 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
322 		 * KIQ MQD no matter SRIOV or Bare-metal
323 		 */
324 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
325 					    AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
326 					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
327 		if (r) {
328 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
329 			return r;
330 		}
331 
332 		/* prepare MQD backup */
333 		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
334 		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
335 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
336 	}
337 
338 	/* create MQD for each KCQ */
339 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
340 		ring = &adev->gfx.compute_ring[i];
341 		if (!ring->mqd_obj) {
342 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
343 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
344 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
345 			if (r) {
346 				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
347 				return r;
348 			}
349 
350 			/* prepare MQD backup */
351 			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
352 			if (!adev->gfx.mec.mqd_backup[i])
353 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
354 		}
355 	}
356 
357 	return 0;
358 }
359 
360 void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
361 {
362 	struct amdgpu_ring *ring = NULL;
363 	int i;
364 
365 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
366 		ring = &adev->gfx.compute_ring[i];
367 		kfree(adev->gfx.mec.mqd_backup[i]);
368 		amdgpu_bo_free_kernel(&ring->mqd_obj,
369 				      &ring->mqd_gpu_addr,
370 				      &ring->mqd_ptr);
371 	}
372 
373 	ring = &adev->gfx.kiq.ring;
374 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
375 	amdgpu_bo_free_kernel(&ring->mqd_obj,
376 			      &ring->mqd_gpu_addr,
377 			      &ring->mqd_ptr);
378 }
379 
380 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
381  *
382  * @adev: amdgpu_device pointer
383  * @bool enable true: enable gfx off feature, false: disable gfx off feature
384  *
385  * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
386  * 2. other client can send request to disable gfx off feature, the request should be honored.
387  * 3. other client can cancel their request of disable gfx off feature
388  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
389  */
390 
391 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
392 {
393 	if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
394 		return;
395 
396 	if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
397 		return;
398 
399 
400 	mutex_lock(&adev->gfx.gfx_off_mutex);
401 
402 	if (!enable)
403 		adev->gfx.gfx_off_req_count++;
404 	else if (adev->gfx.gfx_off_req_count > 0)
405 		adev->gfx.gfx_off_req_count--;
406 
407 	if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
408 		schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
409 	} else if (!enable && adev->gfx.gfx_off_state) {
410 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
411 			adev->gfx.gfx_off_state = false;
412 	}
413 
414 	mutex_unlock(&adev->gfx.gfx_off_mutex);
415 }
416