1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: monk liu <monk.liu@amd.com>
23  */
24 
25 #include <drm/drm_auth.h>
26 #include <drm/drm_drv.h>
27 #include "amdgpu.h"
28 #include "amdgpu_sched.h"
29 #include "amdgpu_ras.h"
30 #include <linux/nospec.h>
31 
32 #define to_amdgpu_ctx_entity(e)	\
33 	container_of((e), struct amdgpu_ctx_entity, entity)
34 
35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36 	[AMDGPU_HW_IP_GFX]	=	1,
37 	[AMDGPU_HW_IP_COMPUTE]	=	4,
38 	[AMDGPU_HW_IP_DMA]	=	2,
39 	[AMDGPU_HW_IP_UVD]	=	1,
40 	[AMDGPU_HW_IP_VCE]	=	1,
41 	[AMDGPU_HW_IP_UVD_ENC]	=	1,
42 	[AMDGPU_HW_IP_VCN_DEC]	=	1,
43 	[AMDGPU_HW_IP_VCN_ENC]	=	1,
44 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
45 };
46 
47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
48 {
49 	switch (ctx_prio) {
50 	case AMDGPU_CTX_PRIORITY_UNSET:
51 	case AMDGPU_CTX_PRIORITY_VERY_LOW:
52 	case AMDGPU_CTX_PRIORITY_LOW:
53 	case AMDGPU_CTX_PRIORITY_NORMAL:
54 	case AMDGPU_CTX_PRIORITY_HIGH:
55 	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56 		return true;
57 	default:
58 		return false;
59 	}
60 }
61 
62 static enum drm_sched_priority
63 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
64 {
65 	switch (ctx_prio) {
66 	case AMDGPU_CTX_PRIORITY_UNSET:
67 		return DRM_SCHED_PRIORITY_UNSET;
68 
69 	case AMDGPU_CTX_PRIORITY_VERY_LOW:
70 		return DRM_SCHED_PRIORITY_MIN;
71 
72 	case AMDGPU_CTX_PRIORITY_LOW:
73 		return DRM_SCHED_PRIORITY_MIN;
74 
75 	case AMDGPU_CTX_PRIORITY_NORMAL:
76 		return DRM_SCHED_PRIORITY_NORMAL;
77 
78 	case AMDGPU_CTX_PRIORITY_HIGH:
79 		return DRM_SCHED_PRIORITY_HIGH;
80 
81 	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
82 		return DRM_SCHED_PRIORITY_HIGH;
83 
84 	/* This should not happen as we sanitized userspace provided priority
85 	 * already, WARN if this happens.
86 	 */
87 	default:
88 		WARN(1, "Invalid context priority %d\n", ctx_prio);
89 		return DRM_SCHED_PRIORITY_NORMAL;
90 	}
91 
92 }
93 
94 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
95 				      int32_t priority)
96 {
97 	if (!amdgpu_ctx_priority_is_valid(priority))
98 		return -EINVAL;
99 
100 	/* NORMAL and below are accessible by everyone */
101 	if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
102 		return 0;
103 
104 	if (capable(CAP_SYS_NICE))
105 		return 0;
106 
107 	if (drm_is_current_master(filp))
108 		return 0;
109 
110 	return -EACCES;
111 }
112 
113 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
114 {
115 	switch (prio) {
116 	case AMDGPU_CTX_PRIORITY_HIGH:
117 	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
118 		return AMDGPU_GFX_PIPE_PRIO_HIGH;
119 	default:
120 		return AMDGPU_GFX_PIPE_PRIO_NORMAL;
121 	}
122 }
123 
124 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
125 {
126 	switch (prio) {
127 	case AMDGPU_CTX_PRIORITY_HIGH:
128 		return AMDGPU_RING_PRIO_1;
129 	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
130 		return AMDGPU_RING_PRIO_2;
131 	default:
132 		return AMDGPU_RING_PRIO_0;
133 	}
134 }
135 
136 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
137 {
138 	struct amdgpu_device *adev = ctx->mgr->adev;
139 	unsigned int hw_prio;
140 	int32_t ctx_prio;
141 
142 	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
143 			ctx->init_priority : ctx->override_priority;
144 
145 	switch (hw_ip) {
146 	case AMDGPU_HW_IP_GFX:
147 	case AMDGPU_HW_IP_COMPUTE:
148 		hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
149 		break;
150 	case AMDGPU_HW_IP_VCE:
151 	case AMDGPU_HW_IP_VCN_ENC:
152 		hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
153 		break;
154 	default:
155 		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
156 		break;
157 	}
158 
159 	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
160 	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
161 		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
162 
163 	return hw_prio;
164 }
165 
166 /* Calculate the time spend on the hw */
167 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
168 {
169 	struct drm_sched_fence *s_fence;
170 
171 	if (!fence)
172 		return ns_to_ktime(0);
173 
174 	/* When the fence is not even scheduled it can't have spend time */
175 	s_fence = to_drm_sched_fence(fence);
176 	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
177 		return ns_to_ktime(0);
178 
179 	/* When it is still running account how much already spend */
180 	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
181 		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
182 
183 	return ktime_sub(s_fence->finished.timestamp,
184 			 s_fence->scheduled.timestamp);
185 }
186 
187 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
188 				      struct amdgpu_ctx_entity *centity)
189 {
190 	ktime_t res = ns_to_ktime(0);
191 	uint32_t i;
192 
193 	spin_lock(&ctx->ring_lock);
194 	for (i = 0; i < amdgpu_sched_jobs; i++) {
195 		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
196 	}
197 	spin_unlock(&ctx->ring_lock);
198 	return res;
199 }
200 
201 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
202 				  const u32 ring)
203 {
204 	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
205 	struct amdgpu_device *adev = ctx->mgr->adev;
206 	struct amdgpu_ctx_entity *entity;
207 	enum drm_sched_priority drm_prio;
208 	unsigned int hw_prio, num_scheds;
209 	int32_t ctx_prio;
210 	int r;
211 
212 	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
213 			 GFP_KERNEL);
214 	if (!entity)
215 		return  -ENOMEM;
216 
217 	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
218 			ctx->init_priority : ctx->override_priority;
219 	entity->hw_ip = hw_ip;
220 	entity->sequence = 1;
221 	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
222 	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
223 
224 	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
225 
226 	if (!(adev)->xcp_mgr) {
227 		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
228 		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
229 	} else {
230 		struct amdgpu_fpriv *fpriv;
231 
232 		fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
233 		r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
234 						&num_scheds, &scheds);
235 		if (r)
236 			goto cleanup_entity;
237 	}
238 
239 	/* disable load balance if the hw engine retains context among dependent jobs */
240 	if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
241 	    hw_ip == AMDGPU_HW_IP_VCN_DEC ||
242 	    hw_ip == AMDGPU_HW_IP_UVD_ENC ||
243 	    hw_ip == AMDGPU_HW_IP_UVD) {
244 		sched = drm_sched_pick_best(scheds, num_scheds);
245 		scheds = &sched;
246 		num_scheds = 1;
247 	}
248 
249 	r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
250 				  &ctx->guilty);
251 	if (r)
252 		goto error_free_entity;
253 
254 	/* It's not an error if we fail to install the new entity */
255 	if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
256 		goto cleanup_entity;
257 
258 	return 0;
259 
260 cleanup_entity:
261 	drm_sched_entity_fini(&entity->entity);
262 
263 error_free_entity:
264 	kfree(entity);
265 
266 	return r;
267 }
268 
269 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
270 				  struct amdgpu_ctx_entity *entity)
271 {
272 	ktime_t res = ns_to_ktime(0);
273 	int i;
274 
275 	if (!entity)
276 		return res;
277 
278 	for (i = 0; i < amdgpu_sched_jobs; ++i) {
279 		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
280 		dma_fence_put(entity->fences[i]);
281 	}
282 
283 	amdgpu_xcp_release_sched(adev, entity);
284 
285 	kfree(entity);
286 	return res;
287 }
288 
289 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
290 					u32 *stable_pstate)
291 {
292 	struct amdgpu_device *adev = ctx->mgr->adev;
293 	enum amd_dpm_forced_level current_level;
294 
295 	current_level = amdgpu_dpm_get_performance_level(adev);
296 
297 	switch (current_level) {
298 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
299 		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
300 		break;
301 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
302 		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
303 		break;
304 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
305 		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
306 		break;
307 	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
308 		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
309 		break;
310 	default:
311 		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
312 		break;
313 	}
314 	return 0;
315 }
316 
317 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
318 			   struct drm_file *filp, struct amdgpu_ctx *ctx)
319 {
320 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
321 	u32 current_stable_pstate;
322 	int r;
323 
324 	r = amdgpu_ctx_priority_permit(filp, priority);
325 	if (r)
326 		return r;
327 
328 	memset(ctx, 0, sizeof(*ctx));
329 
330 	kref_init(&ctx->refcount);
331 	ctx->mgr = mgr;
332 	spin_lock_init(&ctx->ring_lock);
333 
334 	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
335 	ctx->reset_counter_query = ctx->reset_counter;
336 	ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
337 	ctx->init_priority = priority;
338 	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
339 
340 	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
341 	if (r)
342 		return r;
343 
344 	if (mgr->adev->pm.stable_pstate_ctx)
345 		ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
346 	else
347 		ctx->stable_pstate = current_stable_pstate;
348 
349 	ctx->ctx_mgr = &(fpriv->ctx_mgr);
350 	return 0;
351 }
352 
353 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
354 					u32 stable_pstate)
355 {
356 	struct amdgpu_device *adev = ctx->mgr->adev;
357 	enum amd_dpm_forced_level level;
358 	u32 current_stable_pstate;
359 	int r;
360 
361 	mutex_lock(&adev->pm.stable_pstate_ctx_lock);
362 	if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
363 		r = -EBUSY;
364 		goto done;
365 	}
366 
367 	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
368 	if (r || (stable_pstate == current_stable_pstate))
369 		goto done;
370 
371 	switch (stable_pstate) {
372 	case AMDGPU_CTX_STABLE_PSTATE_NONE:
373 		level = AMD_DPM_FORCED_LEVEL_AUTO;
374 		break;
375 	case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
376 		level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
377 		break;
378 	case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
379 		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
380 		break;
381 	case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
382 		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
383 		break;
384 	case AMDGPU_CTX_STABLE_PSTATE_PEAK:
385 		level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
386 		break;
387 	default:
388 		r = -EINVAL;
389 		goto done;
390 	}
391 
392 	r = amdgpu_dpm_force_performance_level(adev, level);
393 
394 	if (level == AMD_DPM_FORCED_LEVEL_AUTO)
395 		adev->pm.stable_pstate_ctx = NULL;
396 	else
397 		adev->pm.stable_pstate_ctx = ctx;
398 done:
399 	mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
400 
401 	return r;
402 }
403 
404 static void amdgpu_ctx_fini(struct kref *ref)
405 {
406 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
407 	struct amdgpu_ctx_mgr *mgr = ctx->mgr;
408 	struct amdgpu_device *adev = mgr->adev;
409 	unsigned i, j, idx;
410 
411 	if (!adev)
412 		return;
413 
414 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
415 		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
416 			ktime_t spend;
417 
418 			spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
419 			atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
420 		}
421 	}
422 
423 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
424 		amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
425 		drm_dev_exit(idx);
426 	}
427 
428 	kfree(ctx);
429 }
430 
431 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
432 			  u32 ring, struct drm_sched_entity **entity)
433 {
434 	int r;
435 
436 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
437 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
438 		return -EINVAL;
439 	}
440 
441 	/* Right now all IPs have only one instance - multiple rings. */
442 	if (instance != 0) {
443 		DRM_DEBUG("invalid ip instance: %d\n", instance);
444 		return -EINVAL;
445 	}
446 
447 	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
448 		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
449 		return -EINVAL;
450 	}
451 
452 	if (ctx->entities[hw_ip][ring] == NULL) {
453 		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
454 		if (r)
455 			return r;
456 	}
457 
458 	*entity = &ctx->entities[hw_ip][ring]->entity;
459 	return 0;
460 }
461 
462 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
463 			    struct amdgpu_fpriv *fpriv,
464 			    struct drm_file *filp,
465 			    int32_t priority,
466 			    uint32_t *id)
467 {
468 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
469 	struct amdgpu_ctx *ctx;
470 	int r;
471 
472 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
473 	if (!ctx)
474 		return -ENOMEM;
475 
476 	mutex_lock(&mgr->lock);
477 	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
478 	if (r < 0) {
479 		mutex_unlock(&mgr->lock);
480 		kfree(ctx);
481 		return r;
482 	}
483 
484 	*id = (uint32_t)r;
485 	r = amdgpu_ctx_init(mgr, priority, filp, ctx);
486 	if (r) {
487 		idr_remove(&mgr->ctx_handles, *id);
488 		*id = 0;
489 		kfree(ctx);
490 	}
491 	mutex_unlock(&mgr->lock);
492 	return r;
493 }
494 
495 static void amdgpu_ctx_do_release(struct kref *ref)
496 {
497 	struct amdgpu_ctx *ctx;
498 	u32 i, j;
499 
500 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
501 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
502 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
503 			if (!ctx->entities[i][j])
504 				continue;
505 
506 			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
507 		}
508 	}
509 
510 	amdgpu_ctx_fini(ref);
511 }
512 
513 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
514 {
515 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
516 	struct amdgpu_ctx *ctx;
517 
518 	mutex_lock(&mgr->lock);
519 	ctx = idr_remove(&mgr->ctx_handles, id);
520 	if (ctx)
521 		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
522 	mutex_unlock(&mgr->lock);
523 	return ctx ? 0 : -EINVAL;
524 }
525 
526 static int amdgpu_ctx_query(struct amdgpu_device *adev,
527 			    struct amdgpu_fpriv *fpriv, uint32_t id,
528 			    union drm_amdgpu_ctx_out *out)
529 {
530 	struct amdgpu_ctx *ctx;
531 	struct amdgpu_ctx_mgr *mgr;
532 	unsigned reset_counter;
533 
534 	if (!fpriv)
535 		return -EINVAL;
536 
537 	mgr = &fpriv->ctx_mgr;
538 	mutex_lock(&mgr->lock);
539 	ctx = idr_find(&mgr->ctx_handles, id);
540 	if (!ctx) {
541 		mutex_unlock(&mgr->lock);
542 		return -EINVAL;
543 	}
544 
545 	/* TODO: these two are always zero */
546 	out->state.flags = 0x0;
547 	out->state.hangs = 0x0;
548 
549 	/* determine if a GPU reset has occured since the last call */
550 	reset_counter = atomic_read(&adev->gpu_reset_counter);
551 	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
552 	if (ctx->reset_counter_query == reset_counter)
553 		out->state.reset_status = AMDGPU_CTX_NO_RESET;
554 	else
555 		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
556 	ctx->reset_counter_query = reset_counter;
557 
558 	mutex_unlock(&mgr->lock);
559 	return 0;
560 }
561 
562 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
563 
564 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
565 			     struct amdgpu_fpriv *fpriv, uint32_t id,
566 			     union drm_amdgpu_ctx_out *out)
567 {
568 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
569 	struct amdgpu_ctx *ctx;
570 	struct amdgpu_ctx_mgr *mgr;
571 
572 	if (!fpriv)
573 		return -EINVAL;
574 
575 	mgr = &fpriv->ctx_mgr;
576 	mutex_lock(&mgr->lock);
577 	ctx = idr_find(&mgr->ctx_handles, id);
578 	if (!ctx) {
579 		mutex_unlock(&mgr->lock);
580 		return -EINVAL;
581 	}
582 
583 	out->state.flags = 0x0;
584 	out->state.hangs = 0x0;
585 
586 	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
587 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
588 
589 	if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
590 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
591 
592 	if (atomic_read(&ctx->guilty))
593 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
594 
595 	if (amdgpu_in_reset(adev))
596 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
597 
598 	if (adev->ras_enabled && con) {
599 		/* Return the cached values in O(1),
600 		 * and schedule delayed work to cache
601 		 * new vaues.
602 		 */
603 		int ce_count, ue_count;
604 
605 		ce_count = atomic_read(&con->ras_ce_count);
606 		ue_count = atomic_read(&con->ras_ue_count);
607 
608 		if (ce_count != ctx->ras_counter_ce) {
609 			ctx->ras_counter_ce = ce_count;
610 			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
611 		}
612 
613 		if (ue_count != ctx->ras_counter_ue) {
614 			ctx->ras_counter_ue = ue_count;
615 			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
616 		}
617 
618 		schedule_delayed_work(&con->ras_counte_delay_work,
619 				      msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
620 	}
621 
622 	mutex_unlock(&mgr->lock);
623 	return 0;
624 }
625 
626 
627 
628 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
629 				    struct amdgpu_fpriv *fpriv, uint32_t id,
630 				    bool set, u32 *stable_pstate)
631 {
632 	struct amdgpu_ctx *ctx;
633 	struct amdgpu_ctx_mgr *mgr;
634 	int r;
635 
636 	if (!fpriv)
637 		return -EINVAL;
638 
639 	mgr = &fpriv->ctx_mgr;
640 	mutex_lock(&mgr->lock);
641 	ctx = idr_find(&mgr->ctx_handles, id);
642 	if (!ctx) {
643 		mutex_unlock(&mgr->lock);
644 		return -EINVAL;
645 	}
646 
647 	if (set)
648 		r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
649 	else
650 		r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
651 
652 	mutex_unlock(&mgr->lock);
653 	return r;
654 }
655 
656 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
657 		     struct drm_file *filp)
658 {
659 	int r;
660 	uint32_t id, stable_pstate;
661 	int32_t priority;
662 
663 	union drm_amdgpu_ctx *args = data;
664 	struct amdgpu_device *adev = drm_to_adev(dev);
665 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
666 
667 	id = args->in.ctx_id;
668 	priority = args->in.priority;
669 
670 	/* For backwards compatibility reasons, we need to accept
671 	 * ioctls with garbage in the priority field */
672 	if (!amdgpu_ctx_priority_is_valid(priority))
673 		priority = AMDGPU_CTX_PRIORITY_NORMAL;
674 
675 	switch (args->in.op) {
676 	case AMDGPU_CTX_OP_ALLOC_CTX:
677 		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
678 		args->out.alloc.ctx_id = id;
679 		break;
680 	case AMDGPU_CTX_OP_FREE_CTX:
681 		r = amdgpu_ctx_free(fpriv, id);
682 		break;
683 	case AMDGPU_CTX_OP_QUERY_STATE:
684 		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
685 		break;
686 	case AMDGPU_CTX_OP_QUERY_STATE2:
687 		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
688 		break;
689 	case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
690 		if (args->in.flags)
691 			return -EINVAL;
692 		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
693 		if (!r)
694 			args->out.pstate.flags = stable_pstate;
695 		break;
696 	case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
697 		if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
698 			return -EINVAL;
699 		stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
700 		if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
701 			return -EINVAL;
702 		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
703 		break;
704 	default:
705 		return -EINVAL;
706 	}
707 
708 	return r;
709 }
710 
711 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
712 {
713 	struct amdgpu_ctx *ctx;
714 	struct amdgpu_ctx_mgr *mgr;
715 
716 	if (!fpriv)
717 		return NULL;
718 
719 	mgr = &fpriv->ctx_mgr;
720 
721 	mutex_lock(&mgr->lock);
722 	ctx = idr_find(&mgr->ctx_handles, id);
723 	if (ctx)
724 		kref_get(&ctx->refcount);
725 	mutex_unlock(&mgr->lock);
726 	return ctx;
727 }
728 
729 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
730 {
731 	if (ctx == NULL)
732 		return -EINVAL;
733 
734 	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
735 	return 0;
736 }
737 
738 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
739 			      struct drm_sched_entity *entity,
740 			      struct dma_fence *fence)
741 {
742 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
743 	uint64_t seq = centity->sequence;
744 	struct dma_fence *other = NULL;
745 	unsigned idx = 0;
746 
747 	idx = seq & (amdgpu_sched_jobs - 1);
748 	other = centity->fences[idx];
749 	WARN_ON(other && !dma_fence_is_signaled(other));
750 
751 	dma_fence_get(fence);
752 
753 	spin_lock(&ctx->ring_lock);
754 	centity->fences[idx] = fence;
755 	centity->sequence++;
756 	spin_unlock(&ctx->ring_lock);
757 
758 	atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
759 		     &ctx->mgr->time_spend[centity->hw_ip]);
760 
761 	dma_fence_put(other);
762 	return seq;
763 }
764 
765 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
766 				       struct drm_sched_entity *entity,
767 				       uint64_t seq)
768 {
769 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
770 	struct dma_fence *fence;
771 
772 	spin_lock(&ctx->ring_lock);
773 
774 	if (seq == ~0ull)
775 		seq = centity->sequence - 1;
776 
777 	if (seq >= centity->sequence) {
778 		spin_unlock(&ctx->ring_lock);
779 		return ERR_PTR(-EINVAL);
780 	}
781 
782 
783 	if (seq + amdgpu_sched_jobs < centity->sequence) {
784 		spin_unlock(&ctx->ring_lock);
785 		return NULL;
786 	}
787 
788 	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
789 	spin_unlock(&ctx->ring_lock);
790 
791 	return fence;
792 }
793 
794 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
795 					   struct amdgpu_ctx_entity *aentity,
796 					   int hw_ip,
797 					   int32_t priority)
798 {
799 	struct amdgpu_device *adev = ctx->mgr->adev;
800 	unsigned int hw_prio;
801 	struct drm_gpu_scheduler **scheds = NULL;
802 	unsigned num_scheds;
803 
804 	/* set sw priority */
805 	drm_sched_entity_set_priority(&aentity->entity,
806 				      amdgpu_ctx_to_drm_sched_prio(priority));
807 
808 	/* set hw priority */
809 	if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
810 		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
811 		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
812 		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
813 		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
814 		drm_sched_entity_modify_sched(&aentity->entity, scheds,
815 					      num_scheds);
816 	}
817 }
818 
819 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
820 				  int32_t priority)
821 {
822 	int32_t ctx_prio;
823 	unsigned i, j;
824 
825 	ctx->override_priority = priority;
826 
827 	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
828 			ctx->init_priority : ctx->override_priority;
829 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
830 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
831 			if (!ctx->entities[i][j])
832 				continue;
833 
834 			amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
835 						       i, ctx_prio);
836 		}
837 	}
838 }
839 
840 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
841 			       struct drm_sched_entity *entity)
842 {
843 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
844 	struct dma_fence *other;
845 	unsigned idx;
846 	long r;
847 
848 	spin_lock(&ctx->ring_lock);
849 	idx = centity->sequence & (amdgpu_sched_jobs - 1);
850 	other = dma_fence_get(centity->fences[idx]);
851 	spin_unlock(&ctx->ring_lock);
852 
853 	if (!other)
854 		return 0;
855 
856 	r = dma_fence_wait(other, true);
857 	if (r < 0 && r != -ERESTARTSYS)
858 		DRM_ERROR("Error (%ld) waiting for fence!\n", r);
859 
860 	dma_fence_put(other);
861 	return r;
862 }
863 
864 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
865 			 struct amdgpu_device *adev)
866 {
867 	unsigned int i;
868 
869 	mgr->adev = adev;
870 	mutex_init(&mgr->lock);
871 	idr_init_base(&mgr->ctx_handles, 1);
872 
873 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
874 		atomic64_set(&mgr->time_spend[i], 0);
875 }
876 
877 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
878 {
879 	struct amdgpu_ctx *ctx;
880 	struct idr *idp;
881 	uint32_t id, i, j;
882 
883 	idp = &mgr->ctx_handles;
884 
885 	mutex_lock(&mgr->lock);
886 	idr_for_each_entry(idp, ctx, id) {
887 		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
888 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
889 				struct drm_sched_entity *entity;
890 
891 				if (!ctx->entities[i][j])
892 					continue;
893 
894 				entity = &ctx->entities[i][j]->entity;
895 				timeout = drm_sched_entity_flush(entity, timeout);
896 			}
897 		}
898 	}
899 	mutex_unlock(&mgr->lock);
900 	return timeout;
901 }
902 
903 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
904 {
905 	struct amdgpu_ctx *ctx;
906 	struct idr *idp;
907 	uint32_t id, i, j;
908 
909 	idp = &mgr->ctx_handles;
910 
911 	idr_for_each_entry(idp, ctx, id) {
912 		if (kref_read(&ctx->refcount) != 1) {
913 			DRM_ERROR("ctx %p is still alive\n", ctx);
914 			continue;
915 		}
916 
917 		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
918 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
919 				struct drm_sched_entity *entity;
920 
921 				if (!ctx->entities[i][j])
922 					continue;
923 
924 				entity = &ctx->entities[i][j]->entity;
925 				drm_sched_entity_fini(entity);
926 			}
927 		}
928 	}
929 }
930 
931 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
932 {
933 	struct amdgpu_ctx *ctx;
934 	struct idr *idp;
935 	uint32_t id;
936 
937 	amdgpu_ctx_mgr_entity_fini(mgr);
938 
939 	idp = &mgr->ctx_handles;
940 
941 	idr_for_each_entry(idp, ctx, id) {
942 		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
943 			DRM_ERROR("ctx %p is still alive\n", ctx);
944 	}
945 
946 	idr_destroy(&mgr->ctx_handles);
947 	mutex_destroy(&mgr->lock);
948 }
949 
950 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
951 			  ktime_t usage[AMDGPU_HW_IP_NUM])
952 {
953 	struct amdgpu_ctx *ctx;
954 	unsigned int hw_ip, i;
955 	uint32_t id;
956 
957 	/*
958 	 * This is a little bit racy because it can be that a ctx or a fence are
959 	 * destroyed just in the moment we try to account them. But that is ok
960 	 * since exactly that case is explicitely allowed by the interface.
961 	 */
962 	mutex_lock(&mgr->lock);
963 	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
964 		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
965 
966 		usage[hw_ip] = ns_to_ktime(ns);
967 	}
968 
969 	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
970 		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
971 			for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
972 				struct amdgpu_ctx_entity *centity;
973 				ktime_t spend;
974 
975 				centity = ctx->entities[hw_ip][i];
976 				if (!centity)
977 					continue;
978 				spend = amdgpu_ctx_entity_time(ctx, centity);
979 				usage[hw_ip] = ktime_add(usage[hw_ip], spend);
980 			}
981 		}
982 	}
983 	mutex_unlock(&mgr->lock);
984 }
985